################## Data Merging #######################

## First, load the original country data.

library(foreign)
setwd(dirname(file.choose()))
country_data <- read.dta("xcountry_data.dta")
View(country_data)
region_data <- read.dta("xregion_data (1).dta")
View(region_data)

## Clean the data
## Find the important NA's.
which(is.na(country_data$tyr05_n))
[1]  6 23

## Exclude those countries from the analysis
country_data_all <- country_data
country_data <- country_data_all[-c(6, 23), ]
which(is.na(country_data$tyr05_n))
## integer(0)

## Now check out HDI data.
hdi <- read.csv("hdi_14.csv")
View(hdi)

## Add the sheet for country names and codes so the sheets are easy to match.
country_names <- read.csv("country_names.csv")
View(country_names)

## Now start merging the datasets.
m1 <- as.data.frame(hdi)
m2 <- as.data.frame(country_names)
m3 <- merge(m1, m2, by = intersect(names(m1),names(m2)))
View(m3) ## Make sure we did this right.

## Now add this to the original dataset.
m1 <- m3
m2 <- as.data.frame(country_data)
m3 <- merge(m1, m2, by = intersect(names(m1),names(m2)))
View(m3)
## It worked! We can add it to our new sheet.
country_data_combo <- m3

## Now let's try to get the most recent GDP data. Note that we should be using the
## GDP per capita (PPP basis).

gdp_14 <- read.csv("gdp_14.csv")
View(gdp_14)
m1 <- as.data.frame(gdp_14)
m2 <- as.data.frame(country_data_combo)
m3 <- merge(m1, m2, by = intersect(names(m1),names(m2)))
View(m3) ## Make sure we did this right.
## It worked! We can add it to our new sheet.
country_data_combo <- m3
View(country_data_combo)

## This is exciting. Now we can add the gini coefficient data.
gini <- read.csv("gini.csv")
View(gini)
m1 <- as.data.frame(gini)
m2 <- as.data.frame(country_data_combo)
m3 <- merge(m1, m2, by = intersect(names(m1),names(m2)))
View(m3)
## It worked! We can add it to our new sheet.
country_data_combo <- m3
View(country_data_combo)

## Note that we have to take the log of gdp, so we'll do that for the 2014 data.
country_data_combo$gdp_14 <- log(country_data_combo$gdp_14_net)
## Don't forget to control for dummy_tradex when regressing this, to account for a
## different source of data (trade exchange vs world bank) for 5 of the data points.

## Now we can make this into a csv to share with Pamela.
write.csv(country_data_combo, file = "country_data_combo.csv")

## This is pretty exciting. Now lets check out the conflict data
conflict <- read.csv("conflict_data.csv")
View(conflict)
## Note that yrs_conflict is the total number of conflict observations, with one
## observation made per conflict per year.

## Now add the conflict data.
m1 <- as.data.frame(conflict)
m2 <- as.data.frame(country_data_combo)
m3 <- merge(m1, m2, by = intersect(names(m1),names(m2)))
View(m3)
## It worked! We can add it to our new sheet.
country_data_combo <- m3
View(country_data_combo)

## Now lets look at our data on the environment. 
evi_data <- read.csv("evi_data.csv")
View(evi_data)

## Now add the environmental vulnerability data to the dataset.
m1 <- as.data.frame(evi_data)
m2 <- as.data.frame(country_data_combo)
m3 <- merge(m1, m2, by = intersect(names(m1),names(m2)))
View(m3)
## It worked! We can add it to our new sheet.
country_data_combo <- m3
View(country_data_combo)

## Now add the polity data.
polity_score <- read.csv("polity_score.csv")
View(polity_score)
m1 <- as.data.frame(polity_score)
m2 <- as.data.frame(country_data_combo)
m3 <- merge(m1, m2, by = intersect(names(m1),names(m2)))
View(m3)
## It worked! We can add it to our new sheet.
country_data_combo <- m3
View(country_data_combo)

## Now try adding the corruption data.
cpi <- read.csv("cpi.csv")
cpi <- cpi[,2:3]
View(cpi)
m1 <- as.data.frame(cpi)
m2 <- as.data.frame(country_data_combo)
m3 <- merge(m1, m2, by = intersect(names(m1),names(m2)))
View(m3)
## It worked! We can add it to our new sheet.
country_data_combo <- m3
View(country_data_combo)

country_data_combo$ave_m_elev <- as.numeric(country_data_combo$ave_m_elev)
country_data_combo$gini_year <- as.numeric(country_data_combo$gini_year)
country_data_combo$gini_source <- as.numeric(country_data_combo$gini_source)
country_data_combo$gini_coeff <- as.numeric(country_data_combo$gini_coeff)

## Now merge in years of war.
yrs_war <- read.csv("yrs_war.csv")
View(yrs_war)
m1 <- as.data.frame(yrs_war)
m2 <- as.data.frame(country_data_combo)
m3 <- merge(m1, m2, by = intersect(names(m1),names(m2)))
View(m3)
## It worked! We can add it to our new sheet.
country_data_combo <- m3
View(country_data_combo)
country_data_combo$yrs_war <- as.numeric(country_data_combo$yrs_war)

## Now merge in years of independence.
yrs_ind <- read.csv("yrs_ind.csv")
View(yrs_ind)
m1 <- as.data.frame(yrs_ind)
m2 <- as.data.frame(country_data_combo)
m3 <- merge(m1, m2, by = intersect(names(m1),names(m2)))
View(m3)
## It worked! We can add it to our new sheet.
country_data_combo <- m3
View(country_data_combo)
country_data_combo$yrs_ind <- as.numeric(country_data_combo$yrs_ind)

## Now load and merge the governance indicators from the world bank.
wgi_14 <- read.csv("wgi_14.csv")
View(wgi_14)
m1 <- as.data.frame(wgi_14)
m2 <- as.data.frame(country_data_combo)
m3 <- merge(m1, m2, by = intersect(names(m1),names(m2)))
View(m3)
## It worked! We can add it to our new sheet.
country_data_combo <- m3
View(country_data_combo)
country_data_combo$rule_law <- as.numeric(country_data_combo$rule_law)
country_data_combo$ctrl_corr <- as.numeric(country_data_combo$ctrl_corr)

## Add the new mortality data.
new_mort <- read.csv("new_mort.csv")
country_data_combo <- read.csv("country_data_combo.csv")
View(new_mort)
m1 <- as.data.frame(new_mort)
m2 <- as.data.frame(country_data_combo)
m3 <- merge(m1, m2, by = intersect(names(m1),names(m2)))
View(m3)
## Fix the issue with an extra X column.
m3 <- m3[, -c(4)]
View(m3)
## It worked! We can add it to our new sheet.
country_data_combo <- m3
View(country_data_combo)
country_data_combo$mort_est <- as.numeric(country_data_combo$mort_est)
country_data_combo$mort_source <- as.factor(country_data_combo$mort_source)

## Write the final copy of the csv.
write.csv(country_data_combo, file = "country_data_combo.csv")

write.csv(cd, file = "Gov_2001_Final_Data.csv")


################## Model Testing ######################

## Load the data.
setwd(dirname(file.choose()))
cd <- read.csv("country_data_combo.csv")
View(cd)
library(AER)
library(mgcv)
require(ggplot2)
require(gridExtra)
require(car)
require(sandwich)
require(mgcv)
require(MASS)

## First, we test the linearity of the relationship between GDP, education, and rule of
## law, for both the authors' data and for our data.

GDP_ed_auth.gam <- gam(logpgdp05 ~ s(tyr05_n), data = cd)
summary(GDP_ed_auth.gam)
plot(GDP_ed_auth.gam,
     main="GAM Plot for GDP in 2005 vs Education", 
     xlab="Mean Years Education")
## The gam plot looks linear, and the edf is 1.

GDP_ed_our.gam <- gam(gdp_14 ~ s(school_mean_14), data = cd)
summary(GDP_ed_our.gam)
plot(GDP_ed_our.gam,
     main="GAM Plot for GDP in 2014 vs Education", 
     xlab="Mean Years Education")
## Our gam plot looks linear, and the edf is 1.

GDP_law_auth.gam <- gam(logpgdp05 ~ s(ruleoflaw), data = cd)
summary(GDP_law_auth.gam)
plot(GDP_law_auth.gam,
     main="GAM Plot for GDP in 2005 vs Rule of Law", 
     xlab="Rule of Law")
## This relationship is a little funkier. The gam plot is not completely straight,
## and the edf is 1.493.

## Let's try plotting the relationship to see what it looks like.
## First let's make it easy to add labels--the country code.
code <- as.character(cd$code)
gdp_law_auth.plot <- ggplot(aes(x = ruleoflaw, y = logpgdp05, label=code), data=cd) +
  geom_point(shape = 18) +
  geom_text(aes(label=code)) +
  geom_smooth(fill = "mediumorchid4", colour="yellow2", method = "loess", fullrange=T, size=0.5) +
  xlab("Rule of Law in 2005 ") + 
  ylab("Log GDP per capita, PPP, 2005 ") +
  ggtitle("Log GDP per Capita (PPP) against Rule of Law in 2005 ") +
  theme(panel.background = element_rect(fill = 'lightsteelblue', colour = 'white'))
gdp_law_auth.plot
## There is some heteroskedasticity, but the relationship is roughly linear.
## Venezuela and Zaire have a value for rule of law that is less than -1.5, but
## of those, Venezuela is doing better than expected (above the curve).

GDP_law_our.gam <- gam(gdp_14 ~ s(rule_law), data = cd)
summary(GDP_law_our.gam)
plot(GDP_law_our.gam,
     main="GAM Plot for GDP in 2014 vs Rule of Law", 
     xlab="Rule of Law")
## The relationship is no longer linear. It now appears to be quadratic. The gam
## Plot looks like a parabola. The edf is 2.572.

## Let's try plotting the relationship to see what it looks like.
gdp_law_our.plot <- ggplot(aes(x = rule_law, y = gdp_14,label=code), data=cd) +
  geom_point(shape = 18) +
  geom_text(aes(label=code)) +
  geom_smooth(fill = "mediumorchid4", colour="yellow2", method = "loess", fullrange=T, size=0.5) +
  xlab("Rule of Law in 2014 ") + 
  ylab("Log GDP per capita, PPP, 2014 ") +
  ggtitle("Log GDP per Capita (PPP) against Rule of Law in 2014 ") +
  theme(panel.background = element_rect(fill = 'lightsteelblue', colour = 'white'))
gdp_law_our.plot
## There is heteroskedasticity and a nonlinear (parabolic-ish) relationship.
## It looks as though now there are more countries in which the rule of law has
## completely broken down. But in those countries, the gdp per capita is actually
## higher than it was before.

## I wonder if corruption is having a big impact on rule of law and messing things up.
## We can try regressing the two, making a gam plot, and graphing the relationship
## to see what might be going on there.

law_corr_reg <- ivreg(rule_law ~ ctrl_corr, data=cd)
coeftest(law_corr_reg, vcov = vcovHC(law_corr_reg, "HC1"))
summary(law_corr_reg)
##  ctrl_corr   0.978924   0.083479 11.7265   <2e-16 ***
## So the two are pretty closely related; the coefficient is practically 1.
##  Residual standard error: 23.89 on 60 degrees of freedom
##  Multiple R-Squared: 0.722,	Adjusted R-squared: 0.7173 
##  Wald test: 155.8 on 1 and 60 DF,  p-value: < 2.2e-16 

law_corr.gam <- gam(rule_law ~ s(ctrl_corr), data=cd)
summary(law_corr.gam)
##                 edf Ref.df    F p-value    
##  s(ctrl_corr) 5.857  7.017 30.3  <2e-16 ***
## So, the two are closely related, but the relationship is pretty nonlinear.
plot(law_corr.gam,
     main="GAM Plot for Rule of Law vs Control of Corruption in 2014", 
     xlab="Control of Corruption")
## The relationship is nonlinear and kinda funky looking.

## Let's graph the relationship and see if we can figure out the issue is.
law_corr.plot <- ggplot(aes(x = ctrl_corr, y = rule_law,label=code), data=cd) +
  geom_point(shape = 18) +
  geom_text(aes(label=code)) +
  geom_smooth(fill = "mediumorchid4", colour="yellow2", method = "loess", fullrange=T, size=0.5) +
  xlab("Control of Corruption in 2014 ") + 
  ylab("Rule of Law in 2014 ") +
  ggtitle("Rule of Law vs Control of Corruption in 2014 ") +
  theme(panel.background = element_rect(fill = 'lightsteelblue', colour = 'white'))
law_corr.plot

## Perhaps we can create a new variable relating rule of law to corruption and
## regress gdp on that to see what happens.

nvar1 <- cd$rule_law/cd$ctrl_corr
gdp_nvar1.reg <- ivreg(gdp_14 ~ nvar1, data=cd)
coeftest(gdp_nvar1.reg, vcov = vcovHC(gdp_nvar1.reg, "HC1"))
##              Estimate Std. Error t value Pr(>|t|)    
##  (Intercept) 8.920204   0.154426 57.7636   <2e-16 ***
##  nvar1       0.014096   0.019856  0.7099   0.4805
summary(gdp_nvar1.reg)
##  Residual standard error: 1.125 on 60 degrees of freedom
##  Multiple R-Squared: 0.001129,	Adjusted R-squared: -0.01552 
##  Wald test: 0.06783 on 1 and 60 DF,  p-value: 0.7954
gdp_nvar1.gam <- gam(gdp_14 ~ s(nvar1), data=cd)
summary(gdp_nvar1.gam)
##           edf Ref.df     F p-value
##  s(nvar1)   1      1 0.068   0.795
plot(gdp_nvar1.gam)
## Straight line with probalby a lot of heteroskedasicity.
gdp_nvar1.plot <- ggplot(aes(x = nvar1, y = gdp_14,label=code), data=cd) +
  geom_point(shape = 18) +
  geom_text(aes(label=code)) +
  geom_smooth(fill = "mediumorchid4", colour="yellow2", method = "loess", fullrange=T, size=0.5) +
  xlab("nvar1 ") + 
  ylab("gdp ") +
  ggtitle("gdp vs nvar1 in 2014 ") +
  theme(panel.background = element_rect(fill = 'lightsteelblue', colour = 'white'))
gdp_nvar1.plot
## Our whole plot is getting super messed up by Ghana and South Africa, which have
## an okay rule of law but very low control of corruption compared to other countries.
## Maybe we can take out those to see what the relationship is for the others.
law_drop <- cd$rule_law[-c(21,61)]
corr_drop <- cd$ctrl_corr[-c(21,61)]
nvar2 <- law_drop / corr_drop
gdp_drop <- cd$gdp_14[-c(21,61)]
gdp_nvar2.reg <- ivreg(gdp_drop ~ nvar2, data=cd)
coeftest(gdp_nvar2.reg, vcov = vcovHC(gdp_nvar2.reg, "HC1"))
## (Intercept) 8.906912   0.294090 30.2864   <2e-16 ***
## nvar2       0.035749   0.283132  0.1263      0.9
gdp_nvar2.gam <- gam(gdp_drop ~ s(nvar2), data=cd)
summary(gdp_nvar2.gam)
plot(gdp_nvar2.gam)
## Straight line with probably a lot of heteroskedasicity.
plot(y=gdp_drop, x=nvar2)
## No relationship.

## Let's regres gdp on control of corruption and rule of law to see what happens.
gdp_law_corr.reg <- ivreg(gdp_14 ~ rule_law + ctrl_corr, data=cd)
coeftest(gdp_law_corr.reg, vcov = vcovHC(gdp_law_corr.reg, "HC1"))
##  rule_law    0.0087723  0.0043243  2.0286  0.04702 *  
##  ctrl_corr   0.0041685  0.0044642  0.9338  0.35423

## This is probably not the way we want to go. Let's check out other relationships
## between gdp and rule of law.
gdp_law_poly.reg <- ivreg(gdp_14 ~ rule_law + I(rule_law^2), data=cd)
summary(gdp_law_poly.reg)
##                  Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)    9.333e+00  2.668e-01  34.987  < 2e-16 ***
##  rule_law      -3.278e-02  8.437e-03  -3.885 0.000262 ***
##  I(rule_law^2)  2.814e-04  5.118e-05   5.499 8.62e-07 ***
## The relationship is pretty clearly polynomial.

## I wonder if that's the case for the authors' data, too:
gdp_law_auth_poly.reg <- ivreg(logpgdp05 ~ ruleoflaw + I(ruleoflaw^2), data=cd)
summary(gdp_law_auth_poly.reg)
##                 Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)      8.4113     0.1780  47.265  < 2e-16 ***
##  ruleoflaw        0.8645     0.1347   6.417  2.6e-08 ***
##  I(ruleoflaw^2)   0.1813     0.1302   1.393    0.169    
## The relationship is less clearly quadratic in the case of the 2005 data, but
## it may be there.

## In any case, the takeaway conclusion seems to be that for a rule of law above
## about 50, gdp per capita increases. So there's sort of a tipping point beyond
## which one would expect to see gains.

## But where is this nonlinearity coming from? Let's try regressing rule of law
## in 2014 on its instruments to see if the relationship is linear or not.
law_mort_pden.reg <- ivreg(rule_law ~ lcapped + lpd1500s, data=cd)
coeftest(law_mort_pden.reg, vcov = vcovHC(law_mort_pden.reg, "HC1"))
##              Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept) 159.0103    24.3575  6.5282 1.693e-08 ***
##  lcapped     -20.9872     5.4615 -3.8428 0.0003001 ***
##  lpd1500s     -9.2539     3.4356 -2.6935 0.0091908 **
law_mort_pden.gam <- gam(rule_law ~ s(lcapped) + s(lpd1500s), data = cd)
summary(law_mort_pden.gam)
##                edf Ref.df     F  p-value    
##  s(lcapped)  5.074  5.977 8.562 7.06e-07 ***
##  s(lpd1500s) 1.000  1.000 5.166   0.0269 * 
plot(law_mort_pden.gam)

## The relationship appears to be linear with population density and nonlinear with
## settler mortality.

## This is an issue similar to the one we found with the authors' data from 2005.
## I wonder if corruption is maybe a better proxy?
corr_mort_pden.reg <- ivreg(ctrl_corr ~ lcapped + lpd1500s, data=cd)
coeftest(corr_mort_pden.reg, vcov = vcovHC(law_mort_pden.reg, "HC1"))
corr_mort_pden.gam <- gam(ctrl_corr ~ s(lcapped) + s(lpd1500s), data = cd)
summary(corr_mort_pden.gam)
## Interesting. It looks like the relationship is quadratic with mortality.
##                edf Ref.df     F p-value   
##  s(lcapped)  2.207  2.691 5.087 0.00416 **
##  s(lpd1500s) 1.506  1.876 3.740 0.03145 *
plot(corr_mort_pden.gam)
## These graphs are a bit less funky than before. I wonder if there is a way we
## can sort out the issues between corruption and rule of law, in order to fix
## the relationship.

## Let's start out looking at the relationship between population density and
## settler mortality. Maybe one predicts the other. If that's the case, then we
## might be able to instrument just on the population density data, and it might
## improve our model somewhat.

dens_mort.reg <- ivreg(lcapped ~ lpd1500s, data=cd)
summary(dens_mort.reg)
## So, it's somewhat predictive, but not great:
##              Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)  4.33423    0.12021  36.057  < 2e-16 ***
##  lpd1500s     0.20319    0.06686   3.039  0.00351 ** 
##  Residual standard error: 0.9019 on 60 degrees of freedom
##  Multiple R-Squared: 0.1334,	Adjusted R-squared: 0.119 
##  Wald test: 9.236 on 1 and 60 DF,  p-value: 0.003512

## Let's see what happens when we control for other known factors.
dens_mort_full.reg <- ivreg(lcapped ~ lpd1500s + lat_abst + asia + africa
                            + america + f_french + f_brit, data=cd)
summary(dens_mort_full.reg)
## So yes, about .64 the variation in mortality is predicted by the other control
## variables. So maybe we can take it out of the equation.
##              Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)  3.64725    0.42728   8.536 1.36e-11 ***
##  lpd1500s     0.13129    0.04791   2.740 0.008304 ** 
##  lat_abst    -2.36650    0.62810  -3.768 0.000409 ***
##  asia         0.64855    0.41046   1.580 0.119930    
##  africa       1.56619    0.39812   3.934 0.000240 ***
##  america      1.20570    0.38800   3.107 0.003007 ** 
##  f_french     0.30652    0.23027   1.331 0.188743    
##  f_brit      -0.35687    0.20662  -1.727 0.089842 .  
##  Residual standard error: 0.5722 on 54 degrees of freedom
##  Multiple R-Squared: 0.6861,	Adjusted R-squared: 0.6454 
##  Wald test: 16.86 on 7 and 54 DF,  p-value: 1.404e-11

##  Let's see how the relationship between rule of law and population density changes
##  when we take mortality out of the regression.
law_pdens_mort.reg <- ivreg(rule_law ~ lcapped + lpd1500s, data=cd)
summary(law_pdens_mort.reg)
##  Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)  159.010     21.610   7.358 6.69e-10 ***
##  lcapped      -20.987      4.875  -4.305 6.38e-05 ***
##  lpd1500s      -9.254      2.712  -3.413  0.00117 ** 
##  Residual standard error: 34.06 on 59 degrees of freedom
##  Multiple R-Squared: 0.4445,	Adjusted R-squared: 0.4257 
##  Wald test: 23.61 on 2 and 59 DF,  p-value: 2.937e-08

law_pdens.reg <- ivreg(rule_law ~ lpd1500s, data=cd)
summary(law_pdens.reg)
## Population density becomes more significant as a predictor for rule of law when
## we take out settler mortality, probably because the two covary. The R squared
## value drops by almost half, though, so there is somewhat of a tradeoff. Leaving
## off mortality will help us not violate the linearity assumption though.

##              Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)    68.05       5.16  13.188  < 2e-16 ***
##  lpd1500s      -13.52       2.87  -4.711 1.51e-05 ***
##  Residual standard error: 38.71 on 60 degrees of freedom
##  Multiple R-Squared:  0.27,	Adjusted R-squared: 0.2578 
##  Wald test: 22.19 on 1 and 60 DF,  p-value: 1.507e-05

## Let's try it with the other control variables and see if we are missing much.
law_dens_mort_full.reg <- ivreg(rule_law ~ lpd1500s + lcapped + lat_abst + africa
                                + asia + america + f_brit + f_french, data=cd)
summary(law_dens_mort_full.reg)
##  Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)  141.462     36.494   3.876 0.000294 ***
##  lpd1500s     -12.603      2.850  -4.423 4.87e-05 ***
##  lcapped       -7.824      7.583  -1.032 0.306864    
##  lat_abst      53.505     39.333   1.360 0.179491    
##  africa       -39.816     25.164  -1.582 0.119535    
##  asia         -42.732     23.395  -1.827 0.073400 .  
##  america      -61.523     23.475  -2.621 0.011420 *  
##  f_brit         5.702     11.827   0.482 0.631729    
##  f_french     -17.747     13.041  -1.361 0.179312    
##  Residual standard error: 31.89 on 53 degrees of freedom
##  Multiple R-Squared: 0.5626,	Adjusted R-squared: 0.4965 
##  Wald test:  8.52 on 8 and 53 DF,  p-value: 2.279e-07

## This may actually be helpful because settler mortality isn't even significant
## anymore, but population density is. Let's take out mortality from the regression
## and see what happens to the coefficient on population density and the r value.

law_dens_full.reg <- ivreg(rule_law ~ lpd1500s + lat_abst + africa
                           + asia + america + f_brit + f_french, data=cd)
summary(law_dens_full.reg)
##              Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)  112.926     23.824   4.740 1.60e-05 ***
##  lpd1500s     -13.630      2.672  -5.102 4.46e-06 ***
##  lat_abst      72.020     35.021   2.056  0.04458 *  
##  africa       -52.070     22.198  -2.346  0.02269 *  
##  asia         -47.807     22.886  -2.089  0.04144 *  
##  america      -70.956     21.634  -3.280  0.00182 ** 
##  f_brit         8.494     11.520   0.737  0.46413    
##  f_french     -20.145     12.839  -1.569  0.12249    
##  Residual standard error: 31.9 on 54 degrees of freedom
##  Multiple R-Squared: 0.5538,	Adjusted R-squared: 0.4959 
##  Wald test: 9.574 on 7 and 54 DF,  p-value: 1.128e-07 

## The R value is only smaller by about a thousandth, the coefficient on population
## density increases, and so does its significance.

## Now let's run the full stage regression with our 2014 data on gdp as in Table 6,
## and see how our results compare to before.

T6_M4_S2_our.new <- ivreg(gdp_14 ~ dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit
                          + school_mean_14 + rule_law | prienr1900 + protmiss
                          + lpd1500s + dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit,
                          data=cd)
coeftest(T6_M4_S2_our.new, vcov = vcovHC(T6_M4_S2_our.new, "HC1"))
##                   Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)     7.0553176  0.6159603 11.4542 7.787e-16 ***
##  dummy_dennis   -0.1094545  0.4950116 -0.2211   0.82587    
##  lat_abst        0.6816495  0.8313314  0.8199   0.41599    
##  africa         -0.3410205  0.4622545 -0.7377   0.46399    
##  asia            0.2595827  0.2941006  0.8826   0.38150    
##  america         0.1836508  0.2643272  0.6948   0.49028    
##  f_french        0.0970768  0.3837381  0.2530   0.80128    
##  f_brit         -0.0150684  0.3045896 -0.0495   0.96073    
##  school_mean_14  0.1982728  0.0979169  2.0249   0.04803 *  
##  rule_law        0.0063178  0.0053414  1.1828   0.24227
## Education is important, and rule of law is not. But that's also the result we
## got before. Let's try it with the authors 2005 data and see what happens.
summary(T6_M4_S2_our.new)
##  Residual standard error: 0.6279 on 52 degrees of freedom
##  Multiple R-Squared: 0.7302,	Adjusted R-squared: 0.6835 
##  Wald test: 12.37 on 9 and 52 DF,  p-value: 3.424e-10
##  (Note the R squared is 0.68.)

T6_M4_S2_authors.new <- ivreg(logpgdp05 ~ dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit
                              + tyr05_n + ruleoflaw | prienr1900 + protmiss
                              + lpd1500s + dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit,
                              data=cd)
coeftest(T6_M4_S2_authors.new, vcov = vcovHC(T6_M4_S2_authors.new, "HC1"))
##                 Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)   7.5291828  1.1494433  6.5503 2.579e-08 ***
##  dummy_dennis -0.2726456  0.7047981 -0.3868    0.7005    
##  lat_abst     -0.2859826  1.1161193 -0.2562    0.7988    
##  africa       -0.0147733  0.4667726 -0.0316    0.9749    
##  asia          0.1634698  0.4098992  0.3988    0.6917    
##  america       0.5748560  0.4754683  1.2090    0.2321    
##  f_french      0.0085445  0.3235087  0.0264    0.9790    
##  f_brit       -0.2621763  0.3254688 -0.8055    0.4242    
##  tyr05_n       0.1585430  0.1505613  1.0530    0.2972    
##  ruleoflaw     0.8577011  0.5487896  1.5629    0.1241
summary(T6_M4_S2_authors.new)
##  Residual standard error: 0.6888 on 52 degrees of freedom
##  Multiple R-Squared: 0.7252,	Adjusted R-squared: 0.6777 
##  Wald test: 12.48 on 9 and 52 DF,  p-value: 2.929e-10

##  Removing log settler mortality seems to break their results. Rule of law no
##  longer appears significant. However, the model is still about as predictive as
##  before (.678 instead of .684). It would be great if we could get some form of
##  data that had a linear relationship and was better than the settler mortality
##  data.

## I merged in new mortality data. Let's see what happens when we input it into our
## regression with rule of law.
law_nmort_pdens.reg <- ivreg(rule_law ~ mort_est + lpd1500s, data=cd)
summary(law_nmort_pdens.reg)
## Not significant with just these 2, but maybe if we control for the other factors.
law_nmort_pdens_full.reg <- ivreg(rule_law ~ mort_est + lpd1500s + asia + africa
                                  + america + f_french + f_brit + lat_abst, data=cd)
summary(law_nmort_pdens_full.reg)
##              Estimate Std. Error t value Pr(>|t|)    
##  (Intercept) 111.919761  25.430375   4.401 5.24e-05 ***
##  mort_est      0.008191   0.067397   0.122  0.90372    
##  lpd1500s    -13.532932   2.812353  -4.812 1.28e-05 ***
##  asia        -48.172606  23.292961  -2.068  0.04352 *  
##  africa      -52.851324  23.307770  -2.268  0.02746 *  
##  america     -70.964630  21.834086  -3.250  0.00201 ** 
##  f_french    -20.164019  12.959104  -1.556  0.12567    
##  f_brit        8.833362  11.957626   0.739  0.46333    
##  lat_abst     73.378476  37.070347   1.979  0.05297 .
##  Residual standard error: 32.2 on 53 degrees of freedom
##  Multiple R-Squared: 0.5539,	Adjusted R-squared: 0.4866 
##  Wald test: 8.226 on 8 and 53 DF,  p-value: 3.67e-07
##  Now mortality isn't significant at all to rule of law! That's...not good.

## Let's see what happens if we regress it with the authors' values.
law_nmort_pdens_auth.reg <- ivreg(ruleoflaw ~ mort_est + lpd1500s, data=cd)
summary(law_nmort_pdens_auth.reg)
##               Estimate Std. Error t value Pr(>|t|)   
##  (Intercept)  0.205839   0.176448   1.167  0.24808   
##  mort_est    -0.003596   0.001213  -2.966  0.00436 **
##  lpd1500s    -0.199933   0.057940  -3.451  0.00104 **
##  Residual standard error: 0.7815 on 59 degrees of freedom
##  Multiple R-Squared: 0.2626,	Adjusted R-squared: 0.2376 
##  Wald test: 10.51 on 2 and 59 DF,  p-value: 0.0001249
## This is less significant than before, but maybe it will help resolve the model
## problems. Let's fit a gam and find out.

law_nmort_pdens_auth.gam <- gam(ruleoflaw ~ s(mort_est) + s(lpd1500s), data=cd)
summary(law_nmort_pdens_auth.gam)
##  Approximate significance of smooth terms:
##                edf Ref.df     F p-value   
##  s(mort_est) 4.118  4.963 4.511 0.00158 **
##  s(lpd1500s) 2.073  2.637 4.816 0.00703 **

## Well, this is interesting. Now we're getting a possibly quadratic relationship for
## the logged population density, and the edf for mortality is 4. Let's check out
## the plots.
plot(law_nmort_pdens_auth.gam, xlab="Log Settler Mortality, Revised Data",
     ylab="Rule of Law, 2005",
     main="Figure 4: Rule of Law Regressed on Settler Mortality and Population Density")
plot(law_nmort_pdens_auth.gam, xlab="Log Population Density, 1500s",
     ylab="Rule of Law, 2005",
     main="Figure 5: Rule of Law Regressed on Settler Mortality and Population Density")

## Now mortality is messed up in a different way but more roughly parabolic.
## And the plot of the logged population density is kinda parabolic but less
## problematically so. Let's try fitting a parametric model.

law_mort_auth_poly.reg <- ivreg(ruleoflaw ~ mort_est + I(mort_est^2), data=cd)
summary(law_mort_auth_poly.reg)
##                  Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)    9.959e-01  2.477e-01   4.021 0.000167 ***
##  mort_est      -2.384e-02  4.386e-03  -5.435 1.09e-06 ***
##  I(mort_est^2)  7.220e-05  1.516e-05   4.762 1.29e-05 ***
##  The relationship is pretty clearly quadratic, which messes up our regressions.
law_pdens_auth_poly.reg <- ivreg(ruleoflaw ~ lpd1500s + I(lpd1500s^2), data=cd)
summary(law_pdens_auth_poly.reg)
##                Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)   -0.54478    0.10636  -5.122 3.49e-06 ***
##  lpd1500s      -0.26151    0.05087  -5.141 3.25e-06 ***
##  I(lpd1500s^2)  0.11149    0.01949   5.720 3.76e-07 ***
##  This relationship is also quadratic, which I thought means we shouldn't be able
##  to use it in our first stage regressions. I found another paper though that
##  says we should be able to include a quadratic term in our first stage regressions.
## So let's try that and see what happens.

law_linear.reg <- ivreg(ruleoflaw ~ lpd1500s + mort_est, data=cd)
summary(law_linear.reg)
##  Estimate Std. Error t value Pr(>|t|)   
##  (Intercept)  0.205839   0.176448   1.167  0.24808   
##  lpd1500s    -0.199933   0.057940  -3.451  0.00104 **
##  mort_est    -0.003596   0.001213  -2.966  0.00436 **
##  Residual standard error: 0.7815 on 59 degrees of freedom
##  Multiple R-Squared: 0.2626,	Adjusted R-squared: 0.2376 
##  Wald test: 10.51 on 2 and 59 DF,  p-value: 0.0001249 

law_quad.reg <- ivreg(ruleoflaw ~ lpd1500s + I(lpd1500s^2) + mort_est
                      + I(mort_est^2), data=cd)
summary(law_quad.reg)
##                  Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)    3.214e-01  2.790e-01   1.152 0.254193    
##  lpd1500s      -2.092e-01  5.010e-02  -4.176 0.000103 ***
##  I(lpd1500s^2)  7.534e-02  2.120e-02   3.553 0.000772 ***
##  mort_est      -1.390e-02  4.374e-03  -3.178 0.002396 ** 
##  I(mort_est^2)  4.179e-05  1.462e-05   2.858 0.005936 ** 
##  Residual standard error: 0.6257 on 57 degrees of freedom
##  Multiple R-Squared: 0.5434,	Adjusted R-squared: 0.5114 
##  Wald test: 16.96 on 4 and 57 DF,  p-value: 3.265e-09

## So, we get much more significant results when we include the quadratic terms.

## Let's see if this method improves our results with the 2014 data also.
law_linear_14.reg <- ivreg(rule_law ~ lpd1500s + mort_est, data=cd)
summary(law_linear_14.reg)
##  Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)  79.27059    8.62626   9.189 5.53e-13 ***
##  lpd1500s    -13.44947    2.83258  -4.748 1.35e-05 ***
##  mort_est     -0.09555    0.05928  -1.612    0.112    
##  Residual standard error: 38.21 on 59 degrees of freedom
##  Multiple R-Squared: 0.3008,	Adjusted R-squared: 0.2771 
##  Wald test: 12.69 on 2 and 59 DF,  p-value: 2.607e-05

law_quad_14.reg <- ivreg(rule_law ~ lpd1500s + I(lpd1500s^2) + mort_est
                         + I(mort_est^2), data=cd)
summary(law_quad_14.reg)
##                  Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)    9.617e+01  1.532e+01   6.278 5.01e-08 ***
##  lpd1500s      -1.278e+01  2.751e+00  -4.647 2.04e-05 ***
##  I(lpd1500s^2)  1.810e+00  1.164e+00   1.555  0.12555    
##  mort_est      -6.732e-01  2.402e-01  -2.803  0.00690 ** 
##  I(mort_est^2)  2.184e-03  8.027e-04   2.721  0.00862 ** 
##  Residual standard error: 34.35 on 57 degrees of freedom
##  Multiple R-Squared: 0.454,	Adjusted R-squared: 0.4156 
##  Wald test: 11.85 on 4 and 57 DF,  p-value: 4.522e-07

law_quad_14a.reg <- ivreg(rule_law ~ lpd1500s + I(lpd1500s^2) + mort_est
                          + I(mort_est^2) + prienr1900 + protmiss, data=cd)
coeftest(law_quad_14a.reg, vcov = vcovHC(law_quad_14a.reg, "HC1"))
summary(law_quad_14a.reg)

## So, yes, our results went from being insignificant in the linear model for
## mortality, to being significant at the .001 or .01 level for all variables.

## Now let's check out the relationship between gdp and rule of law. We know from
## the gam plots that the relationship was not linear. Let's see if adding a
## quadratic term helps.

gdp_law_lin.reg <- ivreg(logpgdp05 ~ ruleoflaw, data=cd)
summary(gdp_law_lin.reg)
##              Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)   8.5949     0.1205  71.345  < 2e-16 ***
##  ruleoflaw     0.9296     0.1273   7.301 7.67e-10 ***
##  Residual standard error: 0.8902 on 60 degrees of freedom
##  Multiple R-Squared: 0.4704,	Adjusted R-squared: 0.4616 
##  Wald test:  53.3 on 1 and 60 DF,  p-value: 7.666e-10

gdp_law_quad.reg <- ivreg(logpgdp05 ~ ruleoflaw + I(ruleoflaw^2), data=cd)
summary(gdp_law_quad.reg)
##                 Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)      8.4113     0.1780  47.265  < 2e-16 ***
##  ruleoflaw        0.8645     0.1347   6.417  2.6e-08 ***
##  I(ruleoflaw^2)   0.1813     0.1302   1.393    0.169    
##  Residual standard error: 0.8833 on 59 degrees of freedom
##  Multiple R-Squared: 0.4873,	Adjusted R-squared: 0.4699 
##  Wald test: 28.04 on 2 and 59 DF,  p-value: 2.76e-09

## So, it looks like the linear model is a little better for the 2004 data. Let's
## check whether that's the case for the 2014 data.

gdp_law_lin_14.reg <- ivreg(gdp_14 ~ rule_law, data=cd)
summary(gdp_law_lin_14.reg)
##              Estimate Std. Error t value Pr(>|t|)    
##  (Intercept) 8.221110   0.212146  38.752  < 2e-16 ***
##  rule_law    0.011847   0.002818   4.204 8.85e-05 ***
##  Residual standard error: 0.989 on 60 degrees of freedom
##  Multiple R-Squared: 0.2276,	Adjusted R-squared: 0.2147 
##  Wald test: 17.68 on 1 and 60 DF,  p-value: 8.854e-05 

gdp_law_quad_14.reg <- ivreg(gdp_14 ~ rule_law + I(rule_law^2), data=cd)
summary(gdp_law_quad_14.reg)
##                  Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)    9.333e+00  2.668e-01  34.987  < 2e-16 ***
##  rule_law      -3.278e-02  8.437e-03  -3.885 0.000262 ***
##  I(rule_law^2)  2.814e-04  5.118e-05   5.499 8.62e-07 ***
##  Residual standard error: 0.8109 on 59 degrees of freedom
##  Multiple R-Squared: 0.4893,	Adjusted R-squared: 0.472 
##  Wald test: 28.27 on 2 and 59 DF,  p-value: 2.458e-09

## When we update to the 2014 data, it appears that that use of a quadratic
## relationship is clearly better.

## We didn't have any problems with the relationship between gdp and education for
## either year, according to our gam plots, so let's check the instruments for
## education to see whether they are linear or not.

ed_instr.reg <- ivreg(tyr05_n ~ protmiss + prienr1900, data=cd)
summary(ed_instr.reg)
##  Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)  4.21400    0.35065  12.018  < 2e-16 ***
##  protmiss     0.87443    0.43408   2.014   0.0485 *  
##  prienr1900   0.09389    0.01031   9.110  7.5e-13 ***
##  Residual standard error: 1.855 on 59 degrees of freedom
##  Multiple R-Squared: 0.5984,	Adjusted R-squared: 0.5848 
##  Wald test: 43.96 on 2 and 59 DF,  p-value: 2.048e-12

ed_instr.gam <- gam(tyr05_n ~ s(protmiss) + s(prienr1900), data=cd)
summary(ed_instr.gam)
##                  edf Ref.df      F  p-value    
##  s(protmiss)   1.000   1.00  3.494   0.0666 .  
##  s(prienr1900) 2.768   3.37 27.505 2.17e-13 ***
plot(ed_instr.gam)

## This is interesting. It appears that the relationship between education and
## protestant missionaries is linear, but the relationship between current education
## and primary school enrollment might not be. Let's see if adding a quadratic term
## improves it.

ed_instr_quad.reg <- ivreg(tyr05_n ~ protmiss + prienr1900 + I(prienr1900^2), data=cd)
summary(ed_instr_quad.reg)
##                    Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)      3.9893653  0.3848225  10.367 8.03e-15 ***
##  protmiss         0.6940283  0.4506017   1.540 0.128944    
##  prienr1900       0.1375526  0.0334977   4.106 0.000128 ***
##  I(prienr1900^2) -0.0005253  0.0003837  -1.369 0.176315    
##  Residual standard error: 1.841 on 58 degrees of freedom
##  Multiple R-Squared: 0.611,	Adjusted R-squared: 0.5909 
##  Wald test: 30.37 on 3 and 58 DF,  p-value: 6.247e-12

## The R value is smaller and the coefficients are less significant, so it looks
## like adding a quadratic term did not substantially improve the model. Let's check
## the 2014 education data to see what the relationship there is.

ed_instr_14.reg <- ivreg(school_mean_14 ~ protmiss + prienr1900, data=cd)
summary(ed_instr_14.reg)
##              Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)  5.04753    0.37337  13.519  < 2e-16 ***
##  protmiss     1.28613    0.46221   2.783  0.00723 ** 
##  prienr1900   0.08709    0.01097   7.936 7.03e-11 ***
##  Residual standard error: 1.975 on 59 degrees of freedom
##  Multiple R-Squared: 0.5487,	Adjusted R-squared: 0.5334 
##  Wald test: 35.87 on 2 and 59 DF,  p-value: 6.39e-11

ed_instr_14.gam <- gam(school_mean_14 ~ s(protmiss) + s(prienr1900), data=cd)
summary(ed_instr_14.gam)
plot(ed_instr_14.gam)
##                  edf Ref.df      F  p-value    
##  s(protmiss)   1.000  1.000  7.035   0.0103 *  
##  s(prienr1900) 2.703  3.288 20.660 2.34e-10 ***
## It looks like the relatiohship is still linear with respect to protestant
## missionaries and somewhat nonlinear with respect to primary school enrollment
## in 1900.

## Let's add a quadratic term to see if it improves the relationship.
ed_instr_quad_14.reg <- ivreg(school_mean_14 ~ protmiss + prienr1900
                              + I(prienr1900^2), data=cd)
summary(ed_instr_quad_14.reg)
##                    Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)      4.8790285  0.4130805  11.811   <2e-16 ***
##  protmiss         1.1507977  0.4836899   2.379   0.0207 *  
##  prienr1900       0.1198385  0.0359574   3.333   0.0015 ** 
##  I(prienr1900^2) -0.0003940  0.0004119  -0.957   0.3427    
##  Residual standard error: 1.976 on 58 degrees of freedom
##  Multiple R-Squared: 0.5558,	Adjusted R-squared: 0.5328 
##  Wald test: 24.19 on 3 and 58 DF,  p-value: 2.808e-10

## It looks like adding a quadratic term didn't improve the model. So let's keep
## it linear.

## Now, we can try running a 2-stage regression with all our covariates, including
## the quadratic terms, and see what happens.

rule_law_2 <- I(cd$rule_law^2)
mort_est_2 <- I(cd$mort_est^2)
lpd1500s_2 <- I(cd$lpd1500s^2)
TSLS_law_quad <- ivreg(gdp_14 ~ rule_law + rule_law_2 | mort_est + mort_est_2
                       + lpd1500s + lpd1500s_2, data=cd)
coeftest(TSLS_law_quad, vcov = vcovHC(TSLS_law_quad, "HC1"))
summary(TSLS_law_quad)
##                 Estimate  Std. Error t value Pr(>|t|)    
##  (Intercept)  9.96759887  1.11215581  8.9624 1.32e-12 ***
##  rule_law    -0.05831195  0.03717400 -1.5686  0.12208    
##  rule_law_2   0.00044289  0.00020138  2.1992  0.03179 *
##  Residual standard error: 0.8766 on 59 degrees of freedom
##  Multiple R-Squared: 0.4032,	Adjusted R-squared: 0.383 
##  Wald test:  22.1 on 2 and 59 DF,  p-value: 6.859e-08

## This looks as though it did not help. Now only the squared term for rule of law
## is significant, and the coefficient is really small, and the R squared value is
## only .383. So it doesn't perform as well as the linear model, which was very
## statistically significant. (But the R squared value is better than the linear
## model.)
TSLS_law <- ivreg(gdp_14 ~ rule_law | mort_est + lpd1500s, data=cd)
coeftest(TSLS_law, vcov = vcovHC(TSLS_law, "HC1"))
summary(TSLS_law)
##               Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept) 7.3696620  0.3317268 22.2161 < 2.2e-16 ***
##  rule_law    0.0258790  0.0045733  5.6587 4.529e-07 ***
##  Residual standard error: 1.176 on 60 degrees of freedom
##  Multiple R-Squared: -0.09172,	Adjusted R-squared: -0.1099 
##  Wald test: 17.95 on 1 and 60 DF,  p-value: 7.922e-05

## Let's try pretending for a minute that the relationship between gdp and rule of
## law is linear, and the rest of the model is quadratic, and see what happens.
TSLS_law_quad.2 <- ivreg(gdp_14 ~ rule_law | mort_est + mort_est_2
                         + lpd1500s + lpd1500s_2, data=cd)
coeftest(TSLS_law_quad.2, vcov = vcovHC(TSLS_law_quad.2, "HC1"))
summary(TSLS_law_quad.2)
##               Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept) 7.5116202  0.2604464 28.8413 < 2.2e-16 ***
##  rule_law    0.0235394  0.0027489  8.5632 5.397e-12 ***
##  Residual standard error: 1.122 on 60 degrees of freedom
##  Multiple R-Squared: 0.00587,	Adjusted R-squared: -0.0107 
##  Wald test: 24.62 on 1 and 60 DF,  p-value: 6.087e-06

## Our statistical significance for rule of law is very high now. Let's add in 
## the role of education and see what happens.

TSLS_law_ed_quad.2 <- ivreg(gdp_14 ~ rule_law + school_mean_14| mort_est + mort_est_2
                            + lpd1500s + lpd1500s_2 + protmiss + prienr1900, data=cd)
coeftest(TSLS_law_ed_quad.2, vcov = vcovHC(TSLS_law_ed_quad.2, "HC1"))
summary(TSLS_law_ed_quad.2)
##                  Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)    6.7934647  0.2482760 27.3625 < 2.2e-16 ***
##  rule_law       0.0074852  0.0040791  1.8350 0.0715447 .  
##  school_mean_14 0.2387617  0.0577711  4.1329 0.0001147 ***
##  Residual standard error: 0.6289 on 59 degrees of freedom
##  Multiple R-Squared: 0.6929,	Adjusted R-squared: 0.6824 
##  Wald test: 44.42 on 2 and 59 DF,  p-value: 1.704e-12

## This is problematic if we are going to uphold the institutions argument because
## rule of law is not statistically significant, and it's coefficient is small, 
## whereas education is significant, and it's coefficient is larger.

## Let's try adding in some other variables and see what happens.
TSLS_law_ed_quad.full <- ivreg(gdp_14 ~ dummy_dennis + lat_abst + africa + asia
                               + america + f_french + f_brit + rule_law
                               + school_mean_14| mort_est + mort_est_2 + lpd1500s
                               + lpd1500s_2 + protmiss + prienr1900 + dummy_dennis
                               + lat_abst + africa + asia + america + f_french
                               + f_brit, data=cd)
coeftest(TSLS_law_ed_quad.full, vcov = vcovHC(TSLS_law_ed_quad.full, "HC1"))
summary(TSLS_law_ed_quad.full)
##                   Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)     7.0197205  0.6242393 11.2452 1.548e-15 ***
##  dummy_dennis   -0.0811497  0.5014699 -0.1618   0.87207    
##  lat_abst        0.6949536  0.8051366  0.8632   0.39202    
##  africa         -0.3149139  0.4642994 -0.6783   0.50062    
##  asia            0.2621391  0.2972837  0.8818   0.38195    
##  america         0.1732330  0.2627392  0.6593   0.51259    
##  f_french        0.0845137  0.3795526  0.2227   0.82467    
##  f_brit         -0.0279962  0.2997398 -0.0934   0.92594    
##  rule_law        0.0055573  0.0050168  1.1077   0.27307    
##  school_mean_14  0.2091866  0.0976977  2.1412   0.03697 * 
##  Residual standard error: 0.6164 on 52 degrees of freedom
##  Multiple R-Squared: 0.7399,	Adjusted R-squared: 0.6949 
##  Wald test: 12.87 on 9 and 52 DF,  p-value: 1.76e-10

## This is problematic because we are still not finding significant results for the
## rule of law, but we are for education. Let's test these models on the 2005 data
## and see what happens.

TSLS_law_ed_quad.05 <- ivreg(logpgdp05 ~ ruleoflaw + tyr05_n | mort_est + mort_est_2
                             + lpd1500s + lpd1500s_2 + protmiss + prienr1900, data=cd)
coeftest(TSLS_law_ed_quad.05, vcov = vcovHC(TSLS_law_ed_quad.05, "HC1"))
summary(TSLS_law_ed_quad.05)
##              Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept) 6.937368   0.454064 15.2784 < 2.2e-16 ***
##  ruleoflaw   0.468034   0.202941  2.3063  0.024627 *  
##  tyr05_n     0.243833   0.063797  3.8220  0.000321 ***
##  Residual standard error: 0.6517 on 59 degrees of freedom
##  Multiple R-Squared: 0.7209,	Adjusted R-squared: 0.7114 
##  Wald test: 53.49 on 2 and 59 DF,  p-value: 5.611e-14

## It appears that both are statistically significant in the simple model. Now we
## can try the more complex one.
TSLS_law_ed_quad.05f <- ivreg(logpgdp05 ~ dummy_dennis + lat_abst + africa + asia
                              + america + f_french + f_brit + ruleoflaw
                              + tyr05_n| mort_est + mort_est_2 + lpd1500s
                              + lpd1500s_2 + protmiss + prienr1900 + dummy_dennis
                              + lat_abst + africa + asia + america + f_french
                              + f_brit, data=cd)
coeftest(TSLS_law_ed_quad.05f, vcov = vcovHC(TSLS_law_ed_quad.05f, "HC1"))
summary(TSLS_law_ed_quad.05f)
##                 Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)   8.0490929  1.2172289  6.6126 2.052e-08 ***
##  dummy_dennis -0.5575508  0.6821419 -0.8174   0.41746    
##  lat_abst     -0.7470256  1.1695474 -0.6387   0.52580    
##  africa        0.0040968  0.4629041  0.0089   0.99297    
##  asia          0.1026060  0.3999735  0.2565   0.79855    
##  america       0.7370066  0.4479494  1.6453   0.10594    
##  f_french     -0.0040814  0.3481127 -0.0117   0.99069    
##  f_brit       -0.2737414  0.3454969 -0.7923   0.43178    
##  ruleoflaw     1.2018463  0.5125026  2.3451   0.02288 *  
##  tyr05_n       0.1017617  0.1555344  0.6543   0.51582
##  Residual standard error: 0.7729 on 52 degrees of freedom
##  Multiple R-Squared: 0.654,	Adjusted R-squared: 0.5941 
##  Wald test: 10.18 on 9 and 52 DF,  p-value: 7.755e-09

## So, now with the more complex model, we find that the significance of education
## has decreased somewhat and is no longer statistically significant, whereas the
## rule of law is still statistically significant.

## I wonder what happens in the new 2014 model if we control for corruption and
## years since independence.

TSLS_law_ed_corr.14Q <- ivreg(gdp_14 ~ rule_law + school_mean_14 + ctrl_corr
                              + yrs_ind | mort_est + mort_est_2 + lpd1500s
                              + lpd1500s_2 + protmiss + prienr1900 + ctrl_corr
                              + yrs_ind, data=cd)
coeftest(TSLS_law_ed_corr.14Q, vcov = vcovHC(TSLS_law_ed_corr.14Q, "HC1"))
summary(TSLS_law_ed_corr.14Q)
##                   Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)     7.0549946  0.4765021 14.8058   <2e-16 ***
##  rule_law        0.0304685  0.0257101  1.1851   0.2409    
##  school_mean_14  0.1688689  0.1083877  1.5580   0.1248    
##  ctrl_corr      -0.0229495  0.0236732 -0.9694   0.3364    
##  yrs_ind         0.0017179  0.0019988  0.8594   0.3937 
##  Residual standard error: 0.9597 on 57 degrees of freedom
##  Multiple R-Squared: 0.309,	Adjusted R-squared: 0.2605 
##  Wald test: 10.54 on 4 and 57 DF,  p-value: 1.845e-06

## This didn't help--nothing is statistically significant anymore.

TSLS_confl_law_ed_.14Q <- ivreg(yrs_war ~ rule_law + school_mean_14| mort_est + mort_est_2
                                + lpd1500s + lpd1500s_2 + protmiss + prienr1900, data=cd)
coeftest(TSLS_confl_law_ed_.14Q, vcov = vcovHC(TSLS_confl_law_ed_.14Q, "HC1"))
summary(TSLS_confl_law_ed_.14Q)
## Not helpful.

## From before:
corr_mort_pden.reg <- ivreg(ctrl_corr ~ lcapped + lpd1500s, data=cd)
coeftest(corr_mort_pden.reg, vcov = vcovHC(corr_mort_pden.reg, "HC1"))
corr_mort_pden.gam <- gam(ctrl_corr ~ s(lcapped) + s(lpd1500s), data = cd)
summary(corr_mort_pden.gam)
##              Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept) 125.3173    25.9168  4.8354 9.885e-06 ***
##  lcapped     -14.0717     5.5627 -2.5297   0.01411 *  
##  lpd1500s     -8.0369     3.0326 -2.6501   0.01031 * 
##                edf Ref.df     F p-value   
##  s(lcapped)  2.207  2.691 5.087 0.00416 **
##  s(lpd1500s) 1.506  1.876 3.740 0.03145 * 
plot(corr_mort_pden.gam)
## Interesting. It looks like the relationship is quadratic with both mortality and
## population density.

## Let's insert the new mortality data and see what happens.
corr_mort_pden_new.reg <- ivreg(ctrl_corr ~ mort_est + lpd1500s, data=cd)
coeftest(corr_mort_pden_new.reg, vcov = vcovHC(corr_mort_pden_new.reg, "HC1"))
corr_mort_pden_new.gam <- gam(ctrl_corr ~ s(mort_est) + s(lpd1500s), data = cd)
summary(corr_mort_pden_new.gam)
##                Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)  69.812709   9.219170  7.5726 2.897e-10 ***
##  mort_est     -0.046700   0.057165 -0.8169  0.417252    
##  lpd1500s    -10.862462   3.312181 -3.2795  0.001747 **
##                edf Ref.df      F  p-value    
##  s(mort_est) 4.671  5.593  3.254 0.008499 ** 
##  s(lpd1500s) 1.000  1.000 12.123 0.000961 ***
plot(corr_mort_pden_new.gam)
## Now we're just seeing a weird quadratic-ish relationship with mortality, and
## the relationship with population density is once again linear. Let's try inserting
## a quadratic term and see what happens.
corr_mort_pden_new.Qreg <- ivreg(ctrl_corr ~ mort_est + mort_est_2 + lpd1500s
                                 + lpd1500s_2, data=cd)
summary(corr_mort_pden_new.Qreg)
##  Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)  79.885475  13.910565   5.743 3.78e-07 ***
##  mort_est     -0.484721   0.218113  -2.222   0.0302 *  
##  mort_est_2    0.001698   0.000729   2.329   0.0234 *  
##  lpd1500s    -10.668412   2.498066  -4.271 7.47e-05 ***
##  lpd1500s_2    2.006237   1.057105   1.898   0.0628 .  
##  Residual standard error: 31.2 on 57 degrees of freedom
##  Multiple R-Squared: 0.4022,	Adjusted R-squared: 0.3603 
##  Wald test: 9.588 on 4 and 57 DF,  p-value: 5.336e-06

## So, it seems that as long as we are including a squared term, these are
##  potentially also instruments for corruption.

## Let's try figuring out the relationship between corruption, years since
## independence, and gdp, checking for any nonlinearities.
gdp_corr_ind.reg <- ivreg(gdp_14 ~ ctrl_corr + yrs_ind, data=cd)
coeftest(gdp_corr_ind.reg, vcov = vcovHC(gdp_corr_ind.reg, "HC1"))
gdp_corr_ind.gam <- gam(gdp_14 ~ s(ctrl_corr) + s(yrs_ind), data = cd)
summary(gdp_corr_ind.gam)
plot(gdp_corr_ind.gam)
##               Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept) 7.7226954  0.2830675 27.2822 < 2.2e-16 ***
##  ctrl_corr   0.0119837  0.0030961  3.8706 0.0002741 ***
##  yrs_ind     0.0049602  0.0018009  2.7544 0.0078066 ** 
##                 edf Ref.df      F  p-value    
##  s(ctrl_corr) 2.766  3.444 15.174 3.74e-08 ***
##  s(yrs_ind)   1.000  1.000  8.962  0.00403 **
## So, it seems that gdp is linearly related to years since independence, and
## has a somewhat quadratic relationship with control of corruption. Let's try 
## adding a squared term for corruption to see what we get.
ctrl_corr_2 <- I(cd$ctrl_corr^2)
gdp_corr_ind_Q14.reg <- ivreg(gdp_14 ~ ctrl_corr + ctrl_corr_2 + yrs_ind, data=cd)
summary(gdp_corr_ind_Q14.reg)
##                Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)  9.255e+00  3.588e-01  25.795  < 2e-16 ***
##  ctrl_corr   -4.495e-02  1.050e-02  -4.280 7.09e-05 ***
##  ctrl_corr_2  3.702e-04  6.621e-05   5.592 6.35e-07 ***
##  yrs_ind      4.723e-03  1.535e-03   3.078  0.00318 ** 
##  Residual standard error: 0.7813 on 58 degrees of freedom
##  Multiple R-Squared: 0.534,	Adjusted R-squared: 0.5099 
##  Wald test: 22.15 on 3 and 58 DF,  p-value: 1.103e-09

## Everything is significant, and the relationship appears to be quadratic. Let's
## try controlling for corruption and its squared term, with independence, in our
## 2-stage model for gdp in 2014.

TSLS_law_ed_corr2.14Q <- ivreg(gdp_14 ~ rule_law + school_mean_14 + ctrl_corr
                               + ctrl_corr_2 + yrs_ind | mort_est + mort_est_2
                               + lpd1500s + lpd1500s_2 + protmiss + prienr1900
                               + ctrl_corr + ctrl_corr_2 + yrs_ind, data=cd)
coeftest(TSLS_law_ed_corr2.14Q, vcov = vcovHC(TSLS_law_ed_corr2.14Q, "HC1"))
summary(TSLS_law_ed_corr2.14Q)
##                   Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)     7.5283068  0.6979658  10.786 2.74e-15 ***
##  rule_law        0.0181512  0.0240351   0.755   0.4533    
##  school_mean_14  0.1650142  0.0890782   1.852   0.0692 .  
##  ctrl_corr      -0.0280196  0.0141288  -1.983   0.0523 .  
##  ctrl_corr_2     0.0001117  0.0001382   0.808   0.4225    
##  yrs_ind         0.0021935  0.0017413   1.260   0.2130    
##  Residual standard error: 0.7639 on 56 degrees of freedom
##  Multiple R-Squared: 0.5699,	Adjusted R-squared: 0.5315 
##  Wald test: 15.56 on 5 and 56 DF,  p-value: 1.402e-09 
## This is interesting. I wonder if corruption might actually be more important
## than rule of law in 2014. Let's try replacing it.
TSLS_ed_corr2.14Q <- ivreg(gdp_14 ~ school_mean_14 + ctrl_corr + ctrl_corr_2
                           + yrs_ind | mort_est + mort_est_2
                           + lpd1500s + lpd1500s_2 + protmiss + prienr1900
                           + yrs_ind, data=cd)
coeftest(TSLS_ed_corr2.14Q, vcov = vcovHC(TSLS_ed_corr2.14Q, "HC1"))
summary(TSLS_ed_corr2.14Q)
##                    Estimate  Std. Error t value  Pr(>|t|)    
##  (Intercept)     8.92267581  1.42433863  6.2644 5.287e-08 ***
##  school_mean_14  0.16392533  0.09598071  1.7079   0.09310 .  
##  ctrl_corr      -0.05900999  0.03774839 -1.5632   0.12353    
##  ctrl_corr_2     0.00040642  0.00023844  1.7045   0.09373 .  
##  yrs_ind         0.00294166  0.00159392  1.8456   0.07015 
##  Residual standard error: 0.6612 on 57 degrees of freedom
##  Multiple R-Squared: 0.672,	Adjusted R-squared: 0.649 
##  Wald test: 22.22 on 4 and 57 DF,  p-value: 4.304e-11

## This is confusing. What if we just control for the number of years since
## independence? Let's find out.
TSLS_law_ed_ind.14Q <- ivreg(gdp_14 ~ rule_law + school_mean_14 + yrs_ind | mort_est
                             + mort_est_2 + lpd1500s + lpd1500s_2 + protmiss
                             + prienr1900 + yrs_ind,
                             data=cd)
coeftest(TSLS_law_ed_ind.14Q, vcov = vcovHC(TSLS_law_ed_ind.14Q, "HC1"))
summary(TSLS_law_ed_ind.14Q)
##  Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)    6.6940180  0.2329658 28.7339 < 2.2e-16 ***
##  rule_law       0.0075372  0.0041943  1.7970 0.0775436 .  
##  school_mean_14 0.2250882  0.0611134  3.6831 0.0005077 ***
##  yrs_ind        0.0018517  0.0013800  1.3418 0.1848757    
##  Residual standard error: 0.6307 on 58 degrees of freedom
##  Multiple R-Squared: 0.6964,	Adjusted R-squared: 0.6807 
##  Wald test: 31.54 on 3 and 58 DF,  p-value: 3.193e-12
##  This did not change our outcome to any substantial extent.

lGNI <- log(as.numeric(cd$gni_ppp_14))
TSLS_law_ed_ind.14Q <- ivreg(lGNI ~ rule_law + school_mean_14 | mort_est
                             + mort_est_2 + lpd1500s + lpd1500s_2 + protmiss
                             + prienr1900, data=cd)
coeftest(TSLS_law_ed_ind.14Q, vcov = vcovHC(TSLS_law_ed_ind.14Q, "HC1"))
summary(TSLS_law_ed_ind.14Q)
##                   Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)     2.2271280  0.3695419  6.0267 1.168e-07 ***
##  rule_law       -0.0017723  0.0041147 -0.4307   0.66824    
##  school_mean_14  0.1489431  0.0678065  2.1966   0.03199 *  
##  Residual standard error: 0.854 on 59 degrees of freedom
##  Multiple R-Squared: 0.1344,	Adjusted R-squared: 0.1051 
##  Wald test: 3.667 on 2 and 59 DF,  p-value: 0.03154

## Neither of these are a great predictor for GNI. R squared is only 0.1. I don't
## think we should try to use GNI as an outcome in our model.
gini.reg <- ivreg(gini_coeff ~ rule_law + school_mean_14 + gini_year, data=cd)
summary(gini.reg)
##                 Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)    33.46070    3.69562   9.054 1.08e-12 ***
##  rule_law       -0.01162    0.02380  -0.488    0.627    
##  school_mean_14  0.40775    0.36809   1.108    0.273    
##  gini_year       0.67265    0.30228   2.225    0.030 *
gini.gam <- gam(gini_coeff ~ s(rule_law) + s(school_mean_14) + s(gini_year), data=cd)
summary(gini.gam)
##                      edf Ref.df     F p-value   
##  s(rule_law)       2.464  3.068 1.111 0.34263   
##  s(school_mean_14) 1.000  1.000 0.648 0.42432   
##  s(gini_year)      3.195  3.944 3.841 0.00952 **'

gini_corr.reg <- ivreg(gini_coeff ~ ctrl_corr + gini_year, data=cd)
summary(gini_corr.reg)
##              Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)  36.3572     3.2794  11.087 4.68e-16 ***
##  ctrl_corr    -0.0209     0.0233  -0.897   0.3733    
##  gini_year     0.7230     0.3001   2.409   0.0191 *
gini_corr.gam <- gam(gini_coeff ~ s(ctrl_corr) + s(gini_year), data=cd)
summary(gini_corr.gam)
##                 edf Ref.df     F p-value   
##  s(ctrl_corr) 1.000  1.000 0.799 0.37525   
##  s(gini_year) 3.236  3.995 4.711 0.00241 **

## Now let's see if we can figure out what is causing the difference in gdp between
## 2005 and 2014. We can plot the difference on the 2005 gdp data.
gdp_diff <- cd$gdp_14 - cd$logpgdp05
gdp_diff.plot <- ggplot(aes(x = logpgdp05, y = gdp_diff, label=code), data=cd) +
  geom_point(shape = 18) +
  geom_text(aes(label=code)) +
  geom_smooth(fill = "mediumorchid4", colour="yellow2", method = "loess", fullrange=T, size=0.5) +
  xlab("Log GDP in 2005 ") + 
  ylab("Difference in logged GDP, 2005-2014 ") +
  ggtitle("Figure 1: GDP Growth Between 2005 and 2014") +
  theme(panel.background = element_rect(fill = 'lightsteelblue', colour = 'white'))
gdp_diff.plot
## It seems like DR Congo, Sierra Leone, Ghana, Nigeria, Indonesia, Sri Lanka, and
## Bangladesh all experienced a large growth in income compared to their incomes in
## 2005. I wonder what explains this. Sierra Leone and Sri Lanka experienced civil
## war that stopped recently. Let's regress gdp_diff on other variables to get a
## sense of what's happening.

gdp_diff.reg1 <- ivreg(gdp_diff ~ voice_acc + polit_stab + govt_eff + reg_qual
                       + rule_law + ctrl_corr + yrs_ind + yrs_war + polity_score
                       + evi, data=cd)
summary(gdp_diff.reg1)
##  These mattered (others didn't):
##  yrs_ind      -0.0010730  0.0005765  -1.861   0.0687 .
##  yrs_war       0.0104887  0.0043942   2.387   0.0209 *
##  Multiple R-Squared: 0.3012,	Adjusted R-squared: 0.1586 
##  Wald test: 2.112 on 10 and 49 DF,  p-value: 0.04122 

gdp_diff.reg2 <- ivreg(gdp_diff ~ yrs_conflict + avg_intensity + gini_coeff
                       + gini_year + life_exp_14 + school_mean_14, data=cd)
summary(gdp_diff.reg2)

## Talking with Pamela on education vs rule of law in outcomes
cd$gni_ppp_14 <- as.numeric(cd$gni_ppp_14)
gni_ed_law <- ivreg(gni_ppp_14 ~ school_mean_14 + rule_law, data=cd)
summary(gni_ed_law)
coeftest(gni_ed_law, vcov = vcovHC(gni_ed_law, "HC1"))

law_s1_14 <- ivreg(rule_law ~ mort_est + mort_est_2 + lpd1500s + lpd1500s_2
                   + prienr1900 + protmiss, data=cd)
coeftest(law_s1_14, vcov = vcovHC(law_s1_14, "HC1"))
##                Estimate  Std. Error t value  Pr(>|t|)    
##  (Intercept)  9.6210e+01  1.8258e+01  5.2696 2.353e-06 ***
##  mort_est    -6.8370e-01  2.4806e-01 -2.7562  0.007918 ** 
##  mort_est_2   2.2659e-03  7.7595e-04  2.9201  0.005064 ** 
##  lpd1500s    -1.1292e+01  4.7501e+00 -2.3772  0.020948 *  
##  lpd1500s_2   1.2193e+00  1.3683e+00  0.8911  0.376747    
##  prienr1900   2.3169e-01  3.4465e-01  0.6723  0.504236    
##  protmiss    -7.1232e+00  8.6205e+00 -0.8263  0.412196 
summary(law_s1_14)
##  Residual standard error: 34.49 on 55 degrees of freedom
##  Multiple R-Squared: 0.4688,	Adjusted R-squared: 0.4109 
##  Wald test: 8.091 on 6 and 55 DF,  p-value: 2.779e-06

law_s1_05 <- ivreg(ruleoflaw ~ mort_est + mort_est_2 + lpd1500s + lpd1500s_2
                   + prienr1900 + protmiss, data=cd)
coeftest(law_s1_05, vcov = vcovHC(law_s1_05, "HC1"))
##                Estimate Std. Error t value Pr(>|t|)   
##  (Intercept)  1.0260e-01  3.8386e-01  0.2673 0.790251   
##  mort_est    -1.3010e-02  4.0930e-03 -3.1787 0.002429 **
##  mort_est_2   4.1956e-05  1.2376e-05  3.3901 0.001299 **
##  lpd1500s    -1.3069e-01  7.7477e-02 -1.6868 0.097313 . 
##  lpd1500s_2   5.5935e-02  2.3695e-02  2.3606 0.021816 * 
##  prienr1900   1.1200e-02  6.7262e-03  1.6651 0.101582   
##  protmiss    -1.2312e-01  1.6105e-01 -0.7645 0.447829   
summary(law_s1_05)
##  Residual standard error: 0.6026 on 55 degrees of freedom
##  Multiple R-Squared: 0.5914,	Adjusted R-squared: 0.5468 
##  Wald test: 13.27 on 6 and 55 DF,  p-value: 3.159e-09

ed_s1_14 <- ivreg(school_mean_14 ~ mort_est + mort_est_2 + lpd1500s + lpd1500s_2
                  + prienr1900 + protmiss, data=cd)
coeftest(ed_s1_14, vcov = vcovHC(ed_s1_14, "HC1"))
##                 Estimate  Std. Error t value  Pr(>|t|)    
##  (Intercept)  7.6061e+00  1.0650e+00  7.1415 2.212e-09 ***
##  mort_est    -3.2680e-02  1.3739e-02 -2.3785   0.02088 *  
##  mort_est_2   9.8023e-05  4.5074e-05  2.1747   0.03397 *  
##  lpd1500s    -3.7112e-01  1.4062e-01 -2.6391   0.01079 *  
##  lpd1500s_2   4.8004e-02  4.7376e-02  1.0133   0.31538    
##  prienr1900   5.5461e-02  9.8169e-03  5.6496 5.882e-07 ***
##  protmiss     9.5537e-01  3.8310e-01  2.4938   0.01568 * 
summary(ed_s1_14)
##  Residual standard error: 1.844 on 55 degrees of freedom
##  Multiple R-Squared: 0.6331,	Adjusted R-squared: 0.593 
##  Wald test: 15.82 on 6 and 55 DF,  p-value: 1.863e-10

ed_s1_05 <- ivreg(tyr05_n ~ mort_est + mort_est_2 + lpd1500s + lpd1500s_2
                  + prienr1900 + protmiss, data=cd)
coeftest(ed_s1_05, vcov = vcovHC(ed_s1_05, "HC1"))
##                 Estimate  Std. Error t value  Pr(>|t|)    
##  (Intercept)  6.2863e+00  1.0521e+00  5.9749 1.766e-07 ***
##  mort_est    -2.5871e-02  1.3727e-02 -1.8846   0.06477 .  
##  mort_est_2   7.5316e-05  4.4946e-05  1.6757   0.09947 .  
##  lpd1500s    -2.5938e-01  1.3726e-01 -1.8897   0.06407 .  
##  lpd1500s_2   2.7905e-02  4.7130e-02  0.5921   0.55622    
##  prienr1900   6.9842e-02  8.6289e-03  8.0940 6.131e-11 ***
##  protmiss     6.0718e-01  3.5149e-01  1.7274   0.08970 . 
summary(ed_s1_05)
##  Residual standard error: 1.802 on 55 degrees of freedom
##  Multiple R-Squared: 0.6466,	Adjusted R-squared: 0.608 
##  Wald test: 16.77 on 6 and 55 DF,  p-value: 6.922e-11 

ed_s4_05 <- ivreg(tyr05_n ~ mort_est + mort_est_2 + lpd1500s + lpd1500s_2
                  + prienr1900 + protmiss + dummy_dennis + lat_abst
                  + america + asia + africa + f_french + f_brit, data=cd)
coeftest(ed_s4_05, vcov = vcovHC(ed_s4_05, "HC1"))
##                  Estimate  Std. Error t value Pr(>|t|)   
##  (Intercept)   4.9226e+00  1.6338e+00  3.0130 0.004121 **
##  mort_est     -5.9767e-04  1.7316e-02 -0.0345 0.972610   
##  mort_est_2    5.5261e-06  5.6305e-05  0.0981 0.922225   
##  lpd1500s     -5.6722e-03  1.4315e-01 -0.0396 0.968556   
##  lpd1500s_2    3.6929e-02  5.8958e-02  0.6264 0.534045   
##  prienr1900    4.8176e-02  1.8865e-02  2.5537 0.013888 * 
##  protmiss      1.3318e+00  5.5821e-01  2.3858 0.021035 * 
summary(ed_s4_05)
##  Residual standard error: 1.66 on 48 degrees of freedom
##  Multiple R-Squared: 0.7384,	Adjusted R-squared: 0.6676 
##  Wald test: 10.42 on 13 and 48 DF,  p-value: 5.816e-10

law_s4_05 <- ivreg(ruleoflaw ~ mort_est + mort_est_2 + lpd1500s + lpd1500s_2
                   + prienr1900 + protmiss + dummy_dennis + lat_abst
                   + america + asia + africa + f_french + f_brit, data=cd)
coeftest(law_s4_05, vcov = vcovHC(law_s4_05, "HC1"))
##                  Estimate  Std. Error t value Pr(>|t|)  
##  (Intercept)  -1.4726e-01  5.8759e-01 -0.2506  0.80317  
##  mort_est     -1.9616e-03  4.8850e-03 -0.4016  0.68979  
##  mort_est_2    3.1127e-06  1.5842e-05  0.1965  0.84506  
##  lpd1500s     -1.3530e-01  7.4855e-02 -1.8075  0.07695 .
##  lpd1500s_2    1.8893e-02  2.4452e-02  0.7727  0.44350  
##  prienr1900    2.8589e-03  6.8590e-03  0.4168  0.67868  
##  protmiss      9.4638e-02  1.8848e-01  0.5021  0.61789
summary(law_s4_05)
##  Residual standard error: 0.5769 on 48 degrees of freedom
##  Multiple R-Squared: 0.6731,	Adjusted R-squared: 0.5846 
##  Wald test: 7.602 on 13 and 48 DF,  p-value: 7.583e-08 

law_s4_14 <- ivreg(rule_law ~ mort_est + mort_est_2 + lpd1500s + lpd1500s_2
                   + prienr1900 + protmiss + dummy_dennis + lat_abst
                   + america + asia + africa + f_french + f_brit, data=cd)
coeftest(law_s4_14, vcov = vcovHC(law_s4_14, "HC1"))
##  Estimate  Std. Error t value  Pr(>|t|)    
##  (Intercept)   1.2228e+02  2.8523e+01  4.2869 8.687e-05 ***
##  mort_est     -3.0635e-01  2.7621e-01 -1.1091   0.27292    
##  mort_est_2    8.2102e-04  8.7960e-04  0.9334   0.35528    
##  lpd1500s     -1.1842e+01  4.5187e+00 -2.6207   0.01172 *  
##  lpd1500s_2   -1.4213e+00  1.5076e+00 -0.9428   0.35052    
##  prienr1900   -8.3181e-01  3.7988e-01 -2.1897   0.03344 *  
##  protmiss     -3.7373e-01  1.1025e+01 -0.0339   0.97310    
summary(law_s4_14)
##  Residual standard error: 31.04 on 48 degrees of freedom
##  Multiple R-Squared: 0.6245,	Adjusted R-squared: 0.5228 
##  Wald test: 6.141 on 13 and 48 DF,  p-value: 1.434e-06 

ed_s4_14 <- ivreg(school_mean_14 ~ mort_est + mort_est_2 + lpd1500s + lpd1500s_2
                  + prienr1900 + protmiss + dummy_dennis + lat_abst
                  + america + asia + africa + f_french + f_brit, data=cd)
coeftest(ed_s4_14, vcov = vcovHC(ed_s4_14, "HC1"))
##                  Estimate  Std. Error t value  Pr(>|t|)    
##  (Intercept)   6.2669e+00  1.5393e+00  4.0713 0.0001738 ***
##  mort_est     -1.4850e-03  1.6257e-02 -0.0913 0.9275996    
##  mort_est_2    9.3451e-06  5.2385e-05  0.1784 0.8591653    
##  lpd1500s     -1.1393e-01  1.4067e-01 -0.8100 0.4219622    
##  lpd1500s_2    4.5585e-02  6.3062e-02  0.7229 0.4732720    
##  prienr1900    3.9816e-02  1.7928e-02  2.2209 0.0311111 *  
##  protmiss      1.7987e+00  5.1920e-01  3.4643 0.0011291 **
summary(ed_s4_14)
##  Residual standard error: 1.665 on 48 degrees of freedom
##  Multiple R-Squared: 0.7392,	Adjusted R-squared: 0.6685 
##  Wald test: 10.46 on 13 and 48 DF,  p-value: 5.447e-10

ind_table <- ivreg(gdp_14 ~ yrs_ind, data=cd)
coeftest(ind_table, vcov = vcovHC(ind_table, "HC1"))
summary(ind_table)
code <- as.character(cd$code)
gdp_14.plot <- ggplot(aes(x = yrs_ind, y = gdp_14, label=code), data=cd) +
  geom_point(shape = 18) +
  geom_text(aes(label=code)) +
  geom_smooth(fill = "mediumorchid4", colour="yellow2", method = "lm", fullrange=T, size=0.5) +
  xlab("Number of Years Since Independence ") + 
  ylab("Log GDP per capita, PPP, 2014 ") +
  ggtitle("Figure 2: GDP Increases with Number of Years Since Independence") +
  theme(panel.background = element_rect(fill = 'lightsteelblue', colour = 'white'))
gdp_14.plot

ed_gini <- ivreg(gini_coeff ~ gini_year + school_mean_14, data=cd)
summary(ed_gini)
## Nothing significant.

ed_life <- ivreg(life_exp_14 ~ school_mean_14, data=cd)
summary(ed_life)
##                Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)     54.0271     2.0998  25.729  < 2e-16 ***
##  school_mean_14   2.1676     0.2746   7.893 7.47e-11 ***

ed_polity <- ivreg(polity_score ~ school_mean_14, data=cd)
summary(ed_polity)
##                 Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)     -1.0687     1.5054  -0.710 0.480622    
##  school_mean_14   0.8281     0.2002   4.137 0.000115 ***

ed_climate <- ivreg(evi ~ school_mean_14 + ave_m_elev + coast_area_ratio_m_km2,
                    data=cd)
summary(ed_climate)
coeftest(ed_climate, vcov = vcovHC(ed_climate, "HC1"))

ed_life <- ivreg(life_exp_14 ~ school_mean_14 | prienr1900 + protmiss, data=cd)
coeftest(ed_life, vcov = vcovHC(ed_life, "HC1"))
##                 Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)    55.61578    2.33593 23.8088 < 2.2e-16 ***
##  school_mean_14  1.94343    0.29463  6.5961 1.217e-08 ***
ed_life.plot <- ggplot(aes(x = school_mean_14, y = life_exp_14, label=code), data=cd) +
  geom_point(shape = 18) +
  geom_text(aes(label=code)) +
  geom_smooth(fill = "mediumorchid4", colour="yellow2", method = "lm", fullrange=T, size=0.5) +
  xlab("Mean Years of Schooling of Adult Population, 2014") + 
  ylab("National Life Expectancy, 2014") +
  ggtitle("Figure 3: National Life Expectancy Increases with Education") +
  theme(panel.background = element_rect(fill = 'lightsteelblue', colour = 'white'))
ed_life.plot


### ----------- Final Regressions------------ ###

## Load the data
setwd(dirname(file.choose()))
cd <- read.csv("country_data_combo.csv")
View(cd)
library(AER)

## Authors' Table 6: First Stage Regression: Education
## (Note that we have updated everything to current values, so our results will be
## slightly different.)

## Stata: reg tyr05_n prienr1900 protmiss lcapped lpd1500 dummy_den lat_abs america
##        asia africa f_fr f_br
T6_M4_S1_our <- ivreg(school_mean_14 ~ prienr1900 + protmiss + lcapped + lpd1500s
                      + dummy_dennis + lat_abst + america + asia + africa + f_french
                      + f_brit, data=cd)
coeftest(T6_M4_S1_our, vcov = vcovHC(T6_M4_S1_our, "HC1"))
##                Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)   7.678582   2.016588  3.8077 0.0003843 ***
##  prienr1900    0.037041   0.016852  2.1980 0.0326103 *  
##  protmiss      1.702746   0.381997  4.4575 4.693e-05 ***
##  lcapped      -0.367661   0.504040 -0.7294 0.4691428    
##  lpd1500s     -0.064041   0.167154 -0.3831 0.7032511
## This is basically what we expect.
summary(T6_M4_S1_our) ## For the R value
##  Residual standard error: 1.625 on 50 degrees of freedom
##  Multiple R-Squared: 0.7411,	Adjusted R-squared: 0.6842 
##  Wald test: 13.01 on 11 and 50 DF,  p-value: 3.481e-11

## Stata: reg ruleof  prienr1900 protmiss lcapped lpd1500 dummy_den lat_abs america
##        asia africa f_fr f_br
## Note that we use a slightly different indicator for rule of law.
T6_M8_S1_our <- ivreg(rule_law ~ prienr1900 + protmiss + lcapped + lpd1500s +
                        dummy_dennis + lat_abst + america + asia + africa + f_french
                      + f_brit, data=cd)
coeftest(T6_M8_S1_our, vcov = vcovHC(T6_M8_S1_our, "HC1"))
##                 Estimate Std. Error t value Pr(>|t|)  
##  (Intercept)   0.6238427  0.5473555  1.1397  0.25983  
##  prienr1900    0.0048762  0.0061986  0.7867  0.43520  
##  protmiss      0.0854168  0.1623590  0.5261  0.60115  
##  lcapped      -0.2253142  0.1012044 -2.2263  0.03053 *
##  lpd1500s     -0.1111253  0.0634890 -1.7503  0.08620 .
summary(T6_M8_S1_our) ## For the R value.
## Residual standard error: 0.5578 on 50 degrees of freedom
##  Multiple R-Squared: 0.6838,	Adjusted R-squared: 0.6142 
##  Wald test: 9.829 on 11 and 50 DF,  p-value: 3.683e-09 

##  Stata: reg ruleof  prienr1900 protmiss lcapped lpd1500 dummy_den lat_abs
##  america asia africa f_fr f_br
T6_M8_S1_authors <- ivreg(ruleoflaw ~ prienr1900 + protmiss + lcapped + lpd1500s +
                            dummy_dennis + lat_abst + america + asia + africa + f_french
                          + f_brit, data=cd)
coeftest(T6_M8_S1_authors, vcov = vcovHC(T6_M8_S1_authors, "HC1"))
##                 Estimate Std. Error t value Pr(>|t|)  
##  (Intercept)   0.5166281  0.5602080  0.9222  0.36085  
##  prienr1900    0.0038951  0.0059865  0.6507  0.51825  
##  protmiss      0.0672502  0.1678603  0.4006  0.69040  
##  lcapped      -0.2021674  0.1086748 -1.8603  0.06873 .
##  lpd1500s     -0.0952970  0.0663803 -1.4356  0.15734
summary(T6_M8_S1_authors)
## Residual standard error: 0.561 on 50 degrees of freedom
##  Multiple R-Squared: 0.678,	Adjusted R-squared: 0.6071 
##  Wald test:  9.57 on 11 and 50 DF,  p-value: 5.593e-09 

T6_M4_S1_authors <- ivreg(tyr05_n ~ prienr1900 + protmiss + lcapped + lpd1500s +
                            dummy_dennis + lat_abst + america + asia + africa + f_french
                          + f_brit, data=cd)
coeftest(T6_M4_S1_authors, vcov = vcovHC(T6_M4_S1_authors, "HC1"))
##                Estimate Std. Error t value Pr(>|t|)   
##  (Intercept)   7.076564   2.084549  3.3948 0.001353 **
##  prienr1900    0.046526   0.017632  2.6386 0.011068 * 
##  protmiss      1.186795   0.428822  2.7676 0.007898 **
##  lcapped      -0.539751   0.508059 -1.0624 0.293168   
##  lpd1500s      0.053292   0.164346  0.3243 0.747087
summary(T6_M4_S1_authors)
##  Residual standard error: 1.601 on 50 degrees of freedom
##  Multiple R-Squared: 0.7465,	Adjusted R-squared: 0.6908 
##  Wald test: 13.39 on 11 and 50 DF,  p-value: 2.123e-11

T6_Me_S1_authors <- ivreg(ruleoflaw ~ prienr1900 + protmiss + lcapped + lpd1500s
                          , data=cd)
coeftest(T6_Me_S1_authors, vcov = vcovHC(T6_Me_S1_authors, "HC1"))
##                Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)  1.6939295  0.6264610  2.7040 0.0090161 ** 
##  prienr1900   0.0102348  0.0053288  1.9207 0.0597819 .  
##  protmiss    -0.1335784  0.1580968 -0.8449 0.4016907    
##  lcapped     -0.4737595  0.1206847 -3.9256 0.0002356 ***
##  lpd1500s    -0.0447401  0.0627597 -0.7129 0.4788291    
summary(T6_Me_S1_authors)
##  Residual standard error: 0.6183 on 57 degrees of freedom
##  Multiple R-Squared: 0.5541,	Adjusted R-squared: 0.5228 
##  Wald test: 17.71 on 4 and 57 DF,  p-value: 1.688e-09

T6_Me2_S1_authors <- ivreg(tyr05_n ~ prienr1900 + protmiss + lcapped + lpd1500s
                          , data=cd)
coeftest(T6_Me2_S1_authors, vcov = vcovHC(T6_Me2_S1_authors, "HC1"))
##               Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)  9.340683   1.615791  5.7809 3.278e-07 ***
##  prienr1900   0.062917   0.010396  6.0521 1.182e-07 ***
##  protmiss     0.734315   0.389479  1.8854  0.064479 .  
##  lcapped     -1.005659   0.323805 -3.1058  0.002956 ** 
##  lpd1500s    -0.139837   0.133299 -1.0491  0.298583
summary(T6_Me2_S1_authors)
##  Residual standard error: 1.694 on 57 degrees of freedom
##  Multiple R-Squared: 0.6763,	Adjusted R-squared: 0.6535 
##  Wald test: 29.77 on 4 and 57 DF,  p-value: 2.227e-13 

## This makes little sense to use because we are not finding statistically significant
## results. But we are also not successfully replicating the authors' results, and
## we are not sure why.

## Let's try an earlier model.

## We may actually want to try reporting on models 3 and 7, without the colonial
## origin dummies, since we are more likely to get statistically significant results
## that way.
T6_M3_S1_our <- ivreg(school_mean_14 ~ prienr1900 + protmiss + lcapped + lpd1500s
                      + dummy_dennis + lat_abst + america + asia + africa, data=cd)
coeftest(T6_M3_S1_our, vcov = vcovHC(T6_M3_S1_our, "HC1"))
##                Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)   7.765766   1.817883  4.2719 8.269e-05 ***
##  prienr1900    0.032102   0.014284  2.2474 0.0288827 *  
##  protmiss      1.566070   0.371702  4.2132 0.0001004 ***
##  lcapped      -0.362352   0.430000 -0.8427 0.4032673    
##  lpd1500s     -0.112570   0.138064 -0.8153 0.4185925
summary(T6_M3_S1_our) ## For the R value.
##  Residual standard error: 1.604 on 52 degrees of freedom
##  Multiple R-Squared: 0.7377,	Adjusted R-squared: 0.6923 
##  Wald test: 16.25 on 9 and 52 DF,  p-value: 2.876e-12

T6_M7_S1_our <- ivreg(rule_law ~ prienr1900 + protmiss + lcapped + lpd1500s +
                        dummy_dennis + lat_abst + america + asia + africa, data=cd)
coeftest(T6_M7_S1_our, vcov = vcovHC(T6_M7_S1_our, "HC1"))
##                 Estimate Std. Error t value Pr(>|t|)  
##  (Intercept)   0.5872602  0.5295551  1.1090  0.27255  
##  prienr1900    0.0051904  0.0053419  0.9716  0.33573  
##  protmiss      0.0991972  0.1431055  0.6932  0.49128  
##  lcapped      -0.2146339  0.0888720 -2.4151  0.01928 *
##  lpd1500s     -0.1078589  0.0554228 -1.9461  0.05705 .
summary(T6_M7_S1_our) ## For the R value.
##  Residual standard error: 0.5484 on 52 degrees of freedom
##  Multiple R-Squared: 0.6821,	Adjusted R-squared: 0.6271 
##  Wald test:  12.4 on 9 and 52 DF,  p-value: 3.291e-10

T6_M1_S1_our <- ivreg(rule_law ~ prienr1900 + protmiss + lcapped + lpd1500s +
                        dummy_dennis, data=cd)
coeftest(T6_M1_S1_our, vcov = vcovHC(T6_M1_S1_our, "HC1"))
##                Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)  106.62375   31.42822  3.3926 0.0012766 ** 
##  prienr1900    -0.49498    0.28169 -1.7572 0.0843507 .  
##  protmiss       8.02786    8.27619  0.9700 0.3362197    
##  lcapped      -10.32103    5.69780 -1.8114 0.0754421 .  
##  lpd1500s      -8.00716    3.04921 -2.6260 0.0111211 *  
##  dummy_dennis  91.63817   25.27360  3.6258 0.0006229 ***

T6_M1_S1_our_0 <- ivreg(rule_law ~ prienr1900 + protmiss + lcapped + lpd1500s, data=cd)
coeftest(T6_M1_S1_our_0, vcov = vcovHC(T6_M1_S1_our_0, "HC1"))
##               Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept) 153.85531   30.92319  4.9754 6.335e-06 ***
##  prienr1900    0.10575    0.27657  0.3824  0.703599    
##  protmiss     -5.88052    8.44209 -0.6966  0.488902    
##  lcapped     -19.62598    5.98063 -3.2816  0.001765 ** 
##  lpd1500s     -9.18951    4.11447 -2.2335  0.029460 * 
summary(T6_M1_S1_our_0)
##  Residual standard error: 34.43 on 57 degrees of freedom
##  Multiple R-Squared: 0.4515,	Adjusted R-squared: 0.413 
##  Wald test: 11.73 on 4 and 57 DF,  p-value: 5.112e-07

T6_M4_S1_our_0 <- ivreg(school_mean_14 ~ prienr1900 + protmiss + lcapped + lpd1500s, data=cd)
coeftest(T6_M4_S1_our_0, vcov = vcovHC(T6_M4_S1_our_0, "HC1"))
##  (Intercept) 10.898067   1.695856  6.4263 2.856e-08 ***
##  prienr1900   0.049738   0.011493  4.3279 6.146e-05 ***
##  protmiss     1.086963   0.413974  2.6257   0.01108 *  
##  lcapped     -1.128077   0.334635 -3.3711   0.00135 ** 
##  lpd1500s    -0.224996   0.144086 -1.5615   0.12393 
summary(T6_M4_S1_our_0)
##  Residual standard error: 1.76 on 57 degrees of freedom
##  Multiple R-Squared: 0.6536,	Adjusted R-squared: 0.6293 
##  Wald test: 26.89 on 4 and 57 DF,  p-value: 1.477e-12

## Note that applying the standard error correction is likely to decrease our error
## terms slightly, so we may find more statistically significant results with the
## corrected errors. For now, let's stick with models 4 and 8, and see what happens
## later.

## Time for stage 2 regressions: T6_M4_S2

T6_M4_S2_our <- ivreg(gdp_14 ~ dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit
                      + school_mean_14 + rule_law | prienr1900 + protmiss + lcapped
                      + lpd1500s + dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit,
                      data=cd)
coeftest(T6_M4_S2_our, vcov = vcovHC(T6_M4_S2_our, "HC1"))
##                   Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)     6.8389157  0.5817457 11.7559 2.916e-16 ***
##  dummy_dennis   -0.3132572  0.4667827 -0.6711   0.50513    
##  lat_abst        0.6685381  0.8249559  0.8104   0.42141    
##  africa         -0.2975161  0.4675761 -0.6363   0.52738    
##  asia            0.2619736  0.3002963  0.8724   0.38701    
##  america         0.1724587  0.2735440  0.6305   0.53115    
##  f_french        0.1354418  0.3847577  0.3520   0.72625    
##  f_brit         -0.0275560  0.3076237 -0.0896   0.92897    
##  school_mean_14  0.2174896  0.0956377  2.2741   0.02712 *  
##  rule_law        0.0076953  0.0052149  1.4756   0.14607  
## Wow, this is not good. We're getting the school is significant and rule of law
## is not significant. The coefficient on school is also larger.

T6_M4_S2_our_nc <- ivreg(gdp_14 ~ school_mean_14 + rule_law | prienr1900 + protmiss
                         + lcapped + lpd1500s, data=cd)
coeftest(T6_M4_S2_our_nc, vcov = vcovHC(T6_M4_S2_our_nc, "HC1"))
##                  Estimate Std. Error t value Pr(>|t|)    
##  (Intercept)    6.8113134  0.3178208 21.4313  < 2e-16 ***
##  school_mean_14 0.1871837  0.0883942  2.1176  0.03843 *  
##  rule_law       0.0132159  0.0069005  1.9152  0.06032 .

## Stata: ivreg2 logpgdp05 (tyr05_n ruleof= prienr1900 protmiss lcapped lpd1500 )
##        dummy_den  lat_abs africa america asia  f_br f_fr
T6_M4_S2_authors <- ivreg(logpgdp05 ~ dummy_dennis + lat_abst + africa + asia
                          + america + f_french + f_brit + tyr05_n + ruleoflaw |
                            prienr1900 + protmiss + lcapped + lpd1500s + dummy_dennis
                          + lat_abst + africa + asia + america + f_french + f_brit,
                          data=cd)
coeftest(T6_M4_S2_authors, vcov = vcovHC(T6_M4_S2_authors, "HC1"))
##                 Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)   7.8127099  1.2263154  6.3709 4.975e-08 ***
##  dummy_dennis -0.7057404  0.6567863 -1.0745   0.28754    
##  lat_abst     -0.7359156  1.1137689 -0.6607   0.51169    
##  africa        0.0980305  0.4554690  0.2152   0.83043    
##  asia          0.1371569  0.4238795  0.3236   0.74756    
##  america       0.6986801  0.4902935  1.4250   0.16012    
##  f_french     -0.0055691  0.3374726 -0.0165   0.98690    
##  f_brit       -0.3200710  0.3411809 -0.9381   0.35252    
##  tyr05_n       0.1400092  0.1617200  0.8658   0.39060    
##  ruleoflaw     1.1984614  0.5101875  2.3491   0.02266 *
summary(T6_M4_S2_authors)
##  Residual standard error: 0.7689 on 52 degrees of freedom
##  Multiple R-Squared: 0.6575,	Adjusted R-squared: 0.5983 
##  Wald test: 10.67 on 9 and 52 DF,  p-value: 3.711e-09 

T6_M4_S2_authors_nc <- ivreg(logpgdp05 ~ tyr05_n + ruleoflaw | prienr1900 + protmiss
                          + lcapped + lpd1500s, data=cd)
coeftest(T6_M4_S2_authors_nc, vcov = vcovHC(T6_M4_S2_authors_nc, "HC1"))
##              Estimate Std. Error t value Pr(>|t|)    
##  (Intercept) 7.559835   0.645347 11.7144  < 2e-16 ***
##  tyr05_n     0.161415   0.089068  1.8123  0.07503 .  
##  ruleoflaw   0.814286   0.328112  2.4817  0.01594 *

## Maybe we can conclude that the effects on the rule of law fade as time passes.
## It's been another ten years. That is a significant amount of time for the nations
## that gained independence as recently as the '50s and '60s.

rule_law_our_model <- ivreg(rule_law ~ ctrl_corr + yrs_ind + prienr1900
                            + protmiss + lcapped + lpd1500s + dummy_dennis + lat_abst
                            + america + asia + africa + f_french + f_brit, data=cd)
coeftest(rule_law_our_model, vcov = vcovHC(rule_law_our_model, "HC1"))
summary(rule_law_our_model)
##                 Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)   29.898279  26.337684  1.1352   0.26193    
##  ctrl_corr      0.732481   0.151367  4.8391 1.391e-05 ***
##  yrs_ind        0.178935   0.077357  2.3131   0.02505 *  
##  prienr1900    -0.414113   0.230691 -1.7951   0.07894 .  
##  protmiss      10.207837   8.279898  1.2328   0.22364    
##  lcapped       -5.745712   5.383473 -1.0673   0.29118    
##  lpd1500s      -4.167580   3.056971 -1.3633   0.17915 
##  Residual standard error: 21.69 on 48 degrees of freedom
##  Multiple R-Squared: 0.8167,	Adjusted R-squared: 0.767 
##  Wald test: 16.45 on 13 and 48 DF,  p-value: 1.933e-13
## So control of corruption and years since independence both impact the rule of law.

## Now let's see how our full model does when corruption is controlled for.
T6_M4_S2_nmod <- ivreg(gdp_14 ~ dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit
                       + school_mean_14 + ctrl_corr + yrs_ind + rule_law | prienr1900
                       + protmiss + lcapped + lpd1500s + dummy_dennis + lat_abst
                       + africa + asia + america + f_french + f_brit + ctrl_corr
                       + yrs_ind, data=cd)
coeftest(T6_M4_S2_nmod, vcov = vcovHC(T6_M4_S2_nmod, "HC1"))
## Nothing is significant anymore. Let's just test rule of law in the final model.

gdp_rl_S2_nmod <- ivreg(gdp_14 ~ lat_abst + africa + asia + america
                        + f_french + f_brit + rule_law | lcapped + lpd1500s
                        + lat_abst + africa + asia + america + f_french + f_brit 
                        + ctrl_corr + yrs_ind, data=cd)
coeftest(gdp_rl_S2_nmod, vcov = vcovHC(gdp_rl_S2_nmod, "HC1"))
##               Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)  8.5930209  0.4061195 21.1588 < 2.2e-16 ***
##  lat_abst     0.9225264  1.0825876  0.8521  0.397895    
##  africa      -1.1242519  0.3027185 -3.7139  0.000485 ***
##  asia        -0.0766109  0.4033075 -0.1900  0.850055    
##  america      0.0495014  0.3041083  0.1628  0.871303    
##  f_french     0.0770945  0.4519472  0.1706  0.865189    
##  f_brit       0.2333075  0.2777051  0.8401  0.404542    
##  rule_law     0.0088124  0.0033470  2.6329  0.011017 *

T6_M4_S2_nmod2 <- ivreg(gdp_14 ~ dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit
                        + school_mean_14 + rule_law | prienr1900
                        + protmiss + lcapped + lpd1500s + dummy_dennis + lat_abst
                        + africa + asia + america + f_french + f_brit
                        + yrs_ind, data=cd)
coeftest(T6_M4_S2_nmod2, vcov = vcovHC(T6_M4_S2_nmod2, "HC1"))
## Rule of law actually does worse this way compared to the original model:
##  school_mean_14  0.2197475  0.0951710  2.3090  0.02495 *  
##  rule_law        0.0065468  0.0055051  1.1892  0.23975

gdp_rl_S2_our <- ivreg(gdp_14 ~ lat_abst + africa + asia + america
                       + f_french + f_brit + rule_law | lcapped + lpd1500s
                       + lat_abst + africa + asia + america + f_french + f_brit 
                       , data=cd)
coeftest(gdp_rl_S2_our, vcov = vcovHC(gdp_rl_S2_our, "HC1"))

## Maybe it doesn't make sense to include corruption in this snapshot because it is
## included as a factor in rule of law. Let's try the regression without it.
rule_law_our_ltd <- ivreg(rule_law ~ ctrl_corr + yrs_ind + yrs_war, data=cd)
coeftest(rule_law_our_ltd, vcov = vcovHC(rule_law_our_ltd, "HC1"))
summary(rule_law_our_ltd)

## Now check for outcomes wrt hdi, gini, etc.
T6_M4_S2_yrs_war <- ivreg(yrs_war ~ dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit
                          + school_mean_14 + rule_law | prienr1900 + protmiss + lcapped
                          + lpd1500s + dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit,
                          data=cd)
coeftest(T6_M4_S2_yrs_war, vcov = vcovHC(T6_M4_S2_yrs_war, "HC1"))
## We find that rule of law predicts war better than education. (Maybe significant)
##                  Estimate Std. Error t value Pr(>|t|)  
##  (Intercept)    11.578594   6.656278  1.7395  0.08786 .
##  dummy_dennis    5.382686   5.362664  1.0037  0.32016  
##  lat_abst        7.717009   6.984289  1.1049  0.27429  
##  africa          3.790917   5.157719  0.7350  0.46564  
##  asia            7.843181   4.610083  1.7013  0.09486 .
##  america        -4.277449   3.892542 -1.0989  0.27688  
##  f_french       -8.990310   4.236481 -2.1221  0.03861 *
##  f_brit         -4.287187   4.364824 -0.9822  0.33054  
##  school_mean_14  0.055790   1.080581  0.0516  0.95902  
##  rule_law       -0.105492   0.053031 -1.9893  0.05194 

T6_M4_S2_hdi_14 <- ivreg(hdi_14 ~ dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit
                         + school_mean_14 + rule_law | prienr1900 + protmiss + lcapped
                         + lpd1500s + dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit,
                         data=cd)
coeftest(T6_M4_S2_hdi_14, vcov = vcovHC(T6_M4_S2_hdi_14, "HC1"))
## Our argument doesn't make sense for HDI since it takes education directly into
## account. As expected, we got that education is significant, whereas rule of law
## is not. We can try life expectancy though:

T6_M4_S2_life_exp_14 <- ivreg(life_exp_14 ~ dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit
                              + school_mean_14 + rule_law | prienr1900 + protmiss + lcapped
                              + lpd1500s + dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit,
                              data=cd)
coeftest(T6_M4_S2_life_exp_14, vcov = vcovHC(T6_M4_S2_life_exp_14, "HC1"))
## This one is not helpful either.
##                  Estimate Std. Error t value  Pr(>|t|)    
##  (Intercept)    66.372998   6.403088 10.3658 2.962e-14 ***
##  dummy_dennis    3.856397   6.860538  0.5621  0.576454    
##  lat_abst       13.835832   6.740646  2.0526  0.045160 *  
##  africa         -9.860906   3.138946 -3.1415  0.002773 ** 
##  asia            0.553648   2.877081  0.1924  0.848152    
##  america        -2.138001   2.443797 -0.8749  0.385668    
##  f_french        0.192473   2.661985  0.0723  0.942637    
##  f_brit         -3.629543   2.190083 -1.6573  0.103488    
##  school_mean_14  1.083947   0.716407  1.5130  0.136326    
##  rule_law       -0.021377   0.048141 -0.4441  0.658848

T6_M4_S2_life_exp_14 <- ivreg(life_exp_14 ~ dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit
                              + school_mean_14 + rule_law | prienr1900 + protmiss + lcapped
                              + lpd1500s + dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit,
                              data=cd)
coeftest(T6_M4_S2_life_exp_14, vcov = vcovHC(T6_M4_S2_life_exp_14, "HC1"))
## Not helpful. Ed is more important but neither is significant. And relationship
## is opposite what we would want.

T6_M4_S2_polity_score <- ivreg(polity_score ~ dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit
                               + school_mean_14 + rule_law | prienr1900 + protmiss + lcapped
                               + lpd1500s + dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit,
                               data=cd)
coeftest(T6_M4_S2_polity_score, vcov = vcovHC(T6_M4_S2_polity_score, "HC1"))
## Wow not helpful here either. Wrong direction relationship, not significant, and
## education more important.


T6_M4_S2_evi <- ivreg(evi ~ ave_m_elev + coast_area_ratio_m_km2 + dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit
                      + school_mean_14 + rule_law | prienr1900 + protmiss + lcapped
                      + lpd1500s + ave_m_elev + coast_area_ratio_m_km2 + dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit,
                      data=cd)
coeftest(T6_M4_S2_evi, vcov = vcovHC(T6_M4_S2_evi, "HC1"))
## Nothing useful here.

T6_M4_S2_gini_coeff <- ivreg(gini_coeff ~ gini_year + gini_source + dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit
                             + school_mean_14 + rule_law | prienr1900 + protmiss + lcapped
                             + lpd1500s + gini_year + gini_source + dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit,
                             data=cd)
coeftest(T6_M4_S2_gini_coeff, vcov = vcovHC(T6_M4_S2_gini_coeff, "HC1"))
## Rule of law might be statistically significant if we correct the error terms.
##                   Estimate Std. Error t value Pr(>|t|)  
##  (Intercept)     22.446088  14.521388  1.5457  0.12848  
##  gini_year        0.085801   0.455268  0.1885  0.85128  
##  gini_source     -0.913304   3.135049 -0.2913  0.77201  
##  dummy_dennis   -12.704147  10.895837 -1.1660  0.24916  
##  lat_abst       -19.353499  10.865447 -1.7812  0.08096 .
##  africa           7.643428   7.702501  0.9923  0.32581  
##  asia             4.888647   8.077699  0.6052  0.54778  
##  america         12.991137   7.188269  1.8073  0.07674 .
##  f_french         4.599446   2.585420  1.7790  0.08132 .
##  f_brit           0.464357   2.266504  0.2049  0.83850  
##  school_mean_14   0.744770   1.364627  0.5458  0.58765  
##  rule_law         0.140040   0.078938  1.7740  0.08214 .

T6_M4_S2_gini_coeff_lim <- ivreg(gini_coeff ~ dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit
                                 + school_mean_14 + rule_law | prienr1900 + protmiss + lcapped
                                 + lpd1500s + dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit,
                                 data=cd)
coeftest(T6_M4_S2_gini_coeff_lim, vcov = vcovHC(T6_M4_S2_gini_coeff_lim, "HC1"))

## Let's try corruption as the endogenous variable instead of rule of law.
T6_M4_S2_corr_en <- ivreg(gdp_14 ~ dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit
                          + school_mean_14 + ctrl_corr | prienr1900 + protmiss + lcapped
                          + lpd1500s + dummy_dennis + lat_abst + africa + asia + america + f_french + f_brit,
                          data=cd)
coeftest(T6_M4_S2_corr_en, vcov = vcovHC(T6_M4_S2_corr_en, "HC1"))
## It was not statistically significant (p=.6).

## Stata:  ivreg2 logpgdp05 (ruleof=  lcapped lpd1500) dummy_den  prienr1900
##         protmiss lat_abs africa america asia f_fr 
T5_M8_S2_authors <- ivreg(logpgdp05 ~ dummy_dennis + lat_abst + africa + asia
                          + america + f_french + f_brit + protmiss + prienr1900
                          + ruleoflaw | lcapped + lpd1500s + protmiss + prienr1900
                          + dummy_dennis + lat_abst + africa + asia + america
                          + f_french + f_brit, data=cd)
coeftest(T5_M8_S2_authors, vcov = vcovHC(T5_M8_S2_authors, "HC1"))
## ruleoflaw     1.4442249  0.4758184  3.0352  0.003778 ** (close!)
summary(T5_M8_S2_authors)

T5_M7_S2_authors <- ivreg(logpgdp05 ~ dummy_dennis + lat_abst + africa + asia
                          + america + protmiss + prienr1900
                          + ruleoflaw | lcapped + lpd1500s + protmiss + prienr1900
                          + dummy_dennis + lat_abst + africa + asia + america,
                          data=cd)
coeftest(T5_M7_S2_authors, vcov = vcovHC(T5_M7_S2_authors, "HC1"))
## Robust standard errors
# residual vector
u <- matrix(resid(T5_M7_S2_authors))
# get X matrix/predictors
X <- model.matrix(T5_M7_S2_authors)
# number of obs
n <- dim(X)[1]
# n of predictors
k <- dim(X)[2]
# meat part Sigma is a diagonal with u^2 as elements
meat1 <- t(X) %*% diag(diag(crossprod(t(u)))) %*% X
# degrees of freedom adjust
dfc <- n/(n-k)  
# Robust standard errors
rse <- sqrt(dfc*diag(solve(crossprod(X)) %*% meat1 %*% solve(crossprod(X))))
rse
## ruleoflaw: 0.521830545
